Fix AMD by changing from GetTensorData<T>() to GetTensorMutableData<T>()
This commit is contained in:
@@ -117,7 +117,35 @@
|
|||||||
"Bash(NODE_PATH=\"C:/home/alex/.npm-global/node_modules\" node build_anslib_logging_guide.js)",
|
"Bash(NODE_PATH=\"C:/home/alex/.npm-global/node_modules\" node build_anslib_logging_guide.js)",
|
||||||
"Bash(python \"C:/Users/nghia/AppData/Roaming/Claude/local-agent-mode-sessions/skills-plugin/d8e35aa4-a14e-4e20-b921-ba1b9a3cce86/cdda7cc8-a1c7-42ff-98b4-473ec3e8b9fb/skills/docx/scripts/office/validate.py\" \"C:/Projects/CLionProjects/ANSCORE/docs/ANSLIB_Logging_Guide.docx\")",
|
"Bash(python \"C:/Users/nghia/AppData/Roaming/Claude/local-agent-mode-sessions/skills-plugin/d8e35aa4-a14e-4e20-b921-ba1b9a3cce86/cdda7cc8-a1c7-42ff-98b4-473ec3e8b9fb/skills/docx/scripts/office/validate.py\" \"C:/Projects/CLionProjects/ANSCORE/docs/ANSLIB_Logging_Guide.docx\")",
|
||||||
"Bash(python \"C:/Users/nghia/AppData/Roaming/Claude/local-agent-mode-sessions/skills-plugin/d8e35aa4-a14e-4e20-b921-ba1b9a3cce86/cdda7cc8-a1c7-42ff-98b4-473ec3e8b9fb/skills/docx/scripts/office/soffice.py\" --headless --convert-to pdf --outdir \"C:/Projects/CLionProjects/ANSCORE/docs\" \"C:/Projects/CLionProjects/ANSCORE/docs/ANSLIB_Logging_Guide.docx\")",
|
"Bash(python \"C:/Users/nghia/AppData/Roaming/Claude/local-agent-mode-sessions/skills-plugin/d8e35aa4-a14e-4e20-b921-ba1b9a3cce86/cdda7cc8-a1c7-42ff-98b4-473ec3e8b9fb/skills/docx/scripts/office/soffice.py\" --headless --convert-to pdf --outdir \"C:/Projects/CLionProjects/ANSCORE/docs\" \"C:/Projects/CLionProjects/ANSCORE/docs/ANSLIB_Logging_Guide.docx\")",
|
||||||
"Bash(sort -t: -k1 -u)"
|
"Bash(sort -t: -k1 -u)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE diff core/ANSLicensingSystem/ANSLicense.cpp)",
|
||||||
|
"Bash(awk 'NR>=154022 && NR<=156428 {print}' \"C:/Users/nghia/Downloads/AVNET-8845HS2.log\")",
|
||||||
|
"Bash(awk 'NR>=156350 && NR<=156428 {print}' \"C:/Users/nghia/Downloads/AVNET-8845HS2.log\")",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE log --oneline -10 -- modules/ANSFR/OpenVINODeviceConfig.h modules/ANSFR/OpenVINODeviceConfig.cpp)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE log --oneline -5 -- core/ANSLibsLoader/EPLoader.cpp)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE show --stat 69787b0)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE show --stat 97d8149)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE show 97d8149 -- engines/ONNXEngine/OpenVINODeviceConfig.h modules/ANSFR/ANSFR.cpp)",
|
||||||
|
"Bash(awk 'NR>=7106 && NR<=7250 && /ONNXYOLO|ANSCUSTOM|DetectObjects/ {print}' \"C:/Users/nghia/Downloads/AVNET-8845HS5.log\")",
|
||||||
|
"Bash(awk 'NR>=5850 && NR<=7115 && /ONNXYOLO|ANSCUSTOM_Infer|ANSONNXCL_pp|cls\\\\] calling/' \"C:/Users/nghia/Downloads/AVNET-8845HS5.log\")",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE show --stat 3a527d2)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE show 3a527d2 -- modules/ANSODEngine/ANSCUSTOMDETECTOR.cpp modules/ANSODEngine/ANSONNXYOLO.cpp modules/ANSODEngine/ANSONNXCL.cpp)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE show 3a527d2 -- modules/ANSODEngine/ANSEngineCommon.h)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE diff HEAD modules/ANSODEngine/ANSONNXYOLO.cpp modules/ANSODEngine/ANSONNXCL.cpp)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE diff HEAD modules/ANSODEngine/ANSONNXYOLO.cpp)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE diff HEAD modules/ANSODEngine/ANSONNXCL.cpp)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE log --all --oneline --since='3 weeks ago' -- modules/ANSODEngine/ANSONNXYOLO.cpp modules/ANSODEngine/ANSONNXCL.cpp engines/ONNXEngine/ONNXEngine.cpp engines/ONNXEngine/ONNXEngine.h)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE show --stat 844d739)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE log -p --grep=\"mutex\\\\|lock\\\\|hang\\\\|stall\\\\|concurrent\\\\|thread\" --since=\"3 weeks ago\" -- modules/ANSODEngine/ANSONNXYOLO.cpp)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE show 844d739 -- modules/ANSODEngine/ANSONNXYOLO.cpp)",
|
||||||
|
"Bash(awk '$2>=835 && $2<=860 && /1853990010|ANSCUSTOM|ANSONNXCL|ANSONNXYOLO/' \"C:/Users/nghia/Downloads/AVNET-8845HS7.log\")",
|
||||||
|
"Bash(awk '$2>=836 && $2<=837 && /ANSCUSTOM|ANSONNXCL/' \"C:/Users/nghia/Downloads/AVNET-8845HS7.log\")",
|
||||||
|
"Bash(awk '$2>=836.0 && $2<=836.9' \"C:/Users/nghia/Downloads/AVNET-8845HS7.log\")",
|
||||||
|
"Bash(grep -nB1 -A3 \"DirectML\\\\|DisableMemPattern\\\\|SetExecutionMode\\\\|ExtendedOptimization\\\\|ORT_SEQUENTIAL\" \"C:/Projects/CLionProjects/ANSCORE/engines/ONNXEngine/ONNXEngine.cpp\")",
|
||||||
|
"Bash(grep -nE \"^\\\\}$|warmupModel\\\\\\(|#if 0|#endif|ANSONNXCL_legacy_Init\" C:/Projects/CLionProjects/ANSCORE/modules/ANSODEngine/ANSONNXCL.cpp)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE restore modules/ANSODEngine/ANSONNXCL.h modules/ANSODEngine/ANSONNXCL.cpp)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE status --short modules/)",
|
||||||
|
"Bash(git -C C:/Projects/CLionProjects/ANSCORE diff --stat HEAD modules/ANSODEngine/ANSONNXCL.cpp modules/ANSODEngine/ANSONNXOBB.cpp modules/ANSODEngine/ANSONNXPOSE.cpp modules/ANSODEngine/ANSONNXSEG.cpp modules/ANSODEngine/ANSYOLO12OD.cpp engines/ONNXEngine/ONNXEngine.cpp engines/ONNXEngine/ONNXSAM3.cpp)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -80,6 +80,17 @@ namespace {
|
|||||||
return s.substr(0, 4) + std::string(s.size() - 8, '*') + s.substr(s.size() - 4);
|
return s.substr(0, 4) + std::string(s.size() - 8, '*') + s.substr(s.size() - 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SDK identifier passed to ANSLSHelper for licensing-service auth. Despite
|
||||||
|
// the variable name "privateKey" used at call sites, this is an 11-byte
|
||||||
|
// SDK token that identifies the ANSCENTER product to the licensing
|
||||||
|
// service — NOT a cryptographic signing key. Centralised here so any
|
||||||
|
// future rotation (or move to runtime lookup via env var / config file)
|
||||||
|
// touches one place instead of duplicated literals scattered across the
|
||||||
|
// file. Narrow form is the source of truth; wide form is derived to
|
||||||
|
// avoid two literals drifting out of sync.
|
||||||
|
inline std::string GetSdkKey() { return "AQlSAiRTNtS7X20="; }
|
||||||
|
inline std::wstring GetSdkKeyW() { return String2WString(GetSdkKey()); }
|
||||||
|
|
||||||
// Append both `\` and `/` slash flavours of `raw` to `dst`, lower-cased
|
// Append both `\` and `/` slash flavours of `raw` to `dst`, lower-cased
|
||||||
// and with a trailing separator forced. Empty / pathologically short
|
// and with a trailing separator forced. Empty / pathologically short
|
||||||
// entries are skipped.
|
// entries are skipped.
|
||||||
@@ -661,7 +672,7 @@ namespace ANSCENTER
|
|||||||
try {
|
try {
|
||||||
int _enableFeature;
|
int _enableFeature;
|
||||||
int _productId;
|
int _productId;
|
||||||
std::string privateKey = "AQlSAiRTNtS7X20=";
|
std::string privateKey = GetSdkKey();
|
||||||
int activationResult = 0;
|
int activationResult = 0;
|
||||||
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
||||||
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
||||||
@@ -775,7 +786,7 @@ namespace ANSCENTER
|
|||||||
try {
|
try {
|
||||||
int productId;
|
int productId;
|
||||||
std::string registrationName;
|
std::string registrationName;
|
||||||
std::string privateKey = "AQlSAiRTNtS7X20=";
|
std::string privateKey = GetSdkKey();
|
||||||
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
||||||
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
||||||
ansHelper.SetupLicenseTemplate();
|
ansHelper.SetupLicenseTemplate();
|
||||||
@@ -836,7 +847,7 @@ namespace ANSCENTER
|
|||||||
int ANSLicenseHelper::DeactivateLicense(std::string productName) {
|
int ANSLicenseHelper::DeactivateLicense(std::string productName) {
|
||||||
std::string licenseDirectory = GetLicenseDir();
|
std::string licenseDirectory = GetLicenseDir();
|
||||||
std::vector<std::string> licenseKeyFiles = ListFilesInFolder(licenseDirectory);
|
std::vector<std::string> licenseKeyFiles = ListFilesInFolder(licenseDirectory);
|
||||||
std::string privateKey = "AQlSAiRTNtS7X20=";
|
std::string privateKey = GetSdkKey();
|
||||||
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
||||||
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
||||||
ansHelper.SetupLicenseTemplate();
|
ansHelper.SetupLicenseTemplate();
|
||||||
@@ -901,7 +912,7 @@ namespace ANSCENTER
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
int ANSLicenseHelper::ActivateLicense(std::string productName, std::string licenseKey) {
|
int ANSLicenseHelper::ActivateLicense(std::string productName, std::string licenseKey) {
|
||||||
std::string privateKey = "AQlSAiRTNtS7X20=";
|
std::string privateKey = GetSdkKey();
|
||||||
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
||||||
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
||||||
ansHelper.SetupLicenseTemplate();
|
ansHelper.SetupLicenseTemplate();
|
||||||
@@ -942,7 +953,7 @@ namespace ANSCENTER
|
|||||||
}
|
}
|
||||||
|
|
||||||
int ANSLicenseHelper::ActivateLicenseWithCustomHWID(std::string productName, std::string licenseKey, std::string hwid, std::string &activationKey) {
|
int ANSLicenseHelper::ActivateLicenseWithCustomHWID(std::string productName, std::string licenseKey, std::string hwid, std::string &activationKey) {
|
||||||
std::string privateKey = "AQlSAiRTNtS7X20=";
|
std::string privateKey = GetSdkKey();
|
||||||
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
||||||
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
||||||
ansHelper.SetupLicenseTemplate();
|
ansHelper.SetupLicenseTemplate();
|
||||||
@@ -1020,7 +1031,7 @@ namespace ANSCENTER
|
|||||||
|
|
||||||
std::string licenseDirectory = GetLicenseDir();
|
std::string licenseDirectory = GetLicenseDir();
|
||||||
std::vector<std::string> licenseKeyFiles = ListFilesInFolder(licenseDirectory);
|
std::vector<std::string> licenseKeyFiles = ListFilesInFolder(licenseDirectory);
|
||||||
std::string privateKey = "AQlSAiRTNtS7X20=";
|
std::string privateKey = GetSdkKey();
|
||||||
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
std::string licensingServiceURL = "https://licensingservice.anscenter.com/";
|
||||||
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
ANSCENTER::ANSLSHelper ansHelper(privateKey, licensingServiceURL);
|
||||||
ansHelper.SetupLicenseTemplate();
|
ansHelper.SetupLicenseTemplate();
|
||||||
@@ -1224,7 +1235,7 @@ namespace ANSCENTER
|
|||||||
ANSLSHelper::ANSLSHelper()
|
ANSLSHelper::ANSLSHelper()
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
this->_privateKey = _T("AQlSAiRTNtS7X20=");
|
this->_privateKey = GetSdkKeyW();
|
||||||
this->_licenseServiceURL = _T("https://licensingservice.anscenter.com/");
|
this->_licenseServiceURL = _T("https://licensingservice.anscenter.com/");
|
||||||
this->_sdkLicenseKey = _T("MYNSU-GBQ2Q-SF5U5-S3RVF-5ZKFD");
|
this->_sdkLicenseKey = _T("MYNSU-GBQ2Q-SF5U5-S3RVF-5ZKFD");
|
||||||
SDKRegistration::SetLicenseKey(_sdkLicenseKey.c_str());
|
SDKRegistration::SetLicenseKey(_sdkLicenseKey.c_str());
|
||||||
@@ -1239,7 +1250,7 @@ namespace ANSCENTER
|
|||||||
ANSLSHelper::ANSLSHelper(std::string licenseServiceURL)
|
ANSLSHelper::ANSLSHelper(std::string licenseServiceURL)
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
this->_privateKey = _T("AQlSAiRTNtS7X20=");
|
this->_privateKey = GetSdkKeyW();
|
||||||
this->_licenseServiceURL = String2WString(licenseServiceURL);
|
this->_licenseServiceURL = String2WString(licenseServiceURL);
|
||||||
this->_sdkLicenseKey = _T("MYNSU-GBQ2Q-SF5U5-S3RVF-5ZKFD");
|
this->_sdkLicenseKey = _T("MYNSU-GBQ2Q-SF5U5-S3RVF-5ZKFD");
|
||||||
SDKRegistration::SetLicenseKey(_sdkLicenseKey.c_str());
|
SDKRegistration::SetLicenseKey(_sdkLicenseKey.c_str());
|
||||||
@@ -1255,7 +1266,7 @@ namespace ANSCENTER
|
|||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
if (privateKey.empty()) {
|
if (privateKey.empty()) {
|
||||||
this->_privateKey = _T("AQlSAiRTNtS7X20=");
|
this->_privateKey = GetSdkKeyW();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this->_privateKey = String2WString(privateKey);
|
this->_privateKey = String2WString(privateKey);
|
||||||
@@ -1278,7 +1289,7 @@ namespace ANSCENTER
|
|||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
if (privateKey.empty()) {
|
if (privateKey.empty()) {
|
||||||
this->_privateKey = _T("AQlSAiRTNtS7X20=");
|
this->_privateKey = GetSdkKeyW();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this->_privateKey = String2WString(privateKey);
|
this->_privateKey = String2WString(privateKey);
|
||||||
@@ -1517,7 +1528,7 @@ namespace ANSCENTER
|
|||||||
this->_licenseTemplate->SetDataSize(36); //16 bits for ProductId, 4 bits for FeatureBitMask, and 16 bits for ExpirationData (16+4+16 =36)
|
this->_licenseTemplate->SetDataSize(36); //16 bits for ProductId, 4 bits for FeatureBitMask, and 16 bits for ExpirationData (16+4+16 =36)
|
||||||
this->_licenseTemplate->SetTemplateId(_T("24880"));
|
this->_licenseTemplate->SetTemplateId(_T("24880"));
|
||||||
if(this->_privateKey.empty())
|
if(this->_privateKey.empty())
|
||||||
this->_licenseTemplate->SetPrivateKey(_T("AQlSAiRTNtS7X20="));
|
this->_licenseTemplate->SetPrivateKey(GetSdkKeyW().c_str());
|
||||||
else this->_licenseTemplate->SetPrivateKey(this->_privateKey.c_str());
|
else this->_licenseTemplate->SetPrivateKey(this->_privateKey.c_str());
|
||||||
|
|
||||||
if (this->_publicKey.empty())
|
if (this->_publicKey.empty())
|
||||||
|
|||||||
Binary file not shown.
@@ -819,7 +819,10 @@ namespace ANSCENTER {
|
|||||||
input_values_handler.clear();
|
input_values_handler.clear();
|
||||||
input_values_handler.shrink_to_fit();
|
input_values_handler.shrink_to_fit();
|
||||||
|
|
||||||
const float* vals = output_tensors[0].GetTensorData<float>();
|
// GetTensorMutableData on DirectML — the const GetTensorData
|
||||||
|
// triggers a per-call host-readable mapping that on AMD DML
|
||||||
|
// exhausts a staging-buffer pool after ~8 calls and hangs.
|
||||||
|
const float* vals = output_tensors[0].GetTensorMutableData<float>();
|
||||||
const unsigned int hidden_dim =
|
const unsigned int hidden_dim =
|
||||||
static_cast<unsigned int>(output_node_dims.at(0).at(1));
|
static_cast<unsigned int>(output_node_dims.at(0).at(1));
|
||||||
|
|
||||||
@@ -1377,7 +1380,9 @@ namespace ANSCENTER {
|
|||||||
Ort::RunOptions{ nullptr },
|
Ort::RunOptions{ nullptr },
|
||||||
in_names, &input_tensor, 1, out_names, 1);
|
in_names, &input_tensor, 1, out_names, 1);
|
||||||
|
|
||||||
out_result = post_processing(outputs[0].GetTensorData<float>());
|
// GetTensorMutableData (not GetTensorData) — see comment in this
|
||||||
|
// file's other output-read sites; const GetTensorData hangs on AMD DML.
|
||||||
|
out_result = post_processing(outputs[0].GetTensorMutableData<float>());
|
||||||
}
|
}
|
||||||
|
|
||||||
Ort::Value MOVINET::transform(const cv::Mat& mat)
|
Ort::Value MOVINET::transform(const cv::Mat& mat)
|
||||||
|
|||||||
@@ -463,7 +463,9 @@ namespace ANSCENTER
|
|||||||
auto info = outputs[maskIdx].GetTensorTypeAndShapeInfo();
|
auto info = outputs[maskIdx].GetTensorTypeAndShapeInfo();
|
||||||
m_cachedLangMaskShape = info.GetShape();
|
m_cachedLangMaskShape = info.GetShape();
|
||||||
size_t count = info.GetElementCount();
|
size_t count = info.GetElementCount();
|
||||||
const bool* data = outputs[maskIdx].GetTensorData<bool>();
|
// GetTensorMutableData not GetTensorData on DML — const variant
|
||||||
|
// hangs after ~8 calls. Read-only despite the "Mutable" name.
|
||||||
|
const bool* data = outputs[maskIdx].GetTensorMutableData<bool>();
|
||||||
m_cachedLangMask.resize(count);
|
m_cachedLangMask.resize(count);
|
||||||
for (size_t i = 0; i < count; ++i)
|
for (size_t i = 0; i < count; ++i)
|
||||||
m_cachedLangMask[i] = data[i] ? 1 : 0;
|
m_cachedLangMask[i] = data[i] ? 1 : 0;
|
||||||
@@ -474,7 +476,7 @@ namespace ANSCENTER
|
|||||||
auto info = outputs[featIdx].GetTensorTypeAndShapeInfo();
|
auto info = outputs[featIdx].GetTensorTypeAndShapeInfo();
|
||||||
m_cachedLangFeaturesShape = info.GetShape();
|
m_cachedLangFeaturesShape = info.GetShape();
|
||||||
size_t count = info.GetElementCount();
|
size_t count = info.GetElementCount();
|
||||||
const float* data = outputs[featIdx].GetTensorData<float>();
|
const float* data = outputs[featIdx].GetTensorMutableData<float>();
|
||||||
m_cachedLangFeatures.assign(data, data + count);
|
m_cachedLangFeatures.assign(data, data + count);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -649,7 +651,7 @@ namespace ANSCENTER
|
|||||||
if (elemType == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT && !shape.empty()) {
|
if (elemType == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT && !shape.empty()) {
|
||||||
size_t numElems = info.GetElementCount();
|
size_t numElems = info.GetElementCount();
|
||||||
if (numElems > 0 && numElems < 100000000) {
|
if (numElems > 0 && numElems < 100000000) {
|
||||||
const float* data = decInputs[di].GetTensorData<float>();
|
const float* data = decInputs[di].GetTensorMutableData<float>();
|
||||||
double sum = 0;
|
double sum = 0;
|
||||||
for (size_t k = 0; k < numElems; ++k) sum += data[k];
|
for (size_t k = 0; k < numElems; ++k) sum += data[k];
|
||||||
double mean = sum / numElems;
|
double mean = sum / numElems;
|
||||||
@@ -661,14 +663,14 @@ namespace ANSCENTER
|
|||||||
// Print bool tensor values (for language_mask)
|
// Print bool tensor values (for language_mask)
|
||||||
else if (elemType == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL && !shape.empty()) {
|
else if (elemType == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL && !shape.empty()) {
|
||||||
size_t numElems = info.GetElementCount();
|
size_t numElems = info.GetElementCount();
|
||||||
const bool* data = decInputs[di].GetTensorData<bool>();
|
const bool* data = decInputs[di].GetTensorMutableData<bool>();
|
||||||
std::cout << " vals:";
|
std::cout << " vals:";
|
||||||
for (size_t k = 0; k < std::min(numElems, (size_t)32); ++k)
|
for (size_t k = 0; k < std::min(numElems, (size_t)32); ++k)
|
||||||
std::cout << " " << (int)data[k];
|
std::cout << " " << (int)data[k];
|
||||||
}
|
}
|
||||||
// Print int64 scalar value
|
// Print int64 scalar value
|
||||||
else if (elemType == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64 && shape.empty()) {
|
else if (elemType == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64 && shape.empty()) {
|
||||||
const int64_t* data = decInputs[di].GetTensorData<int64_t>();
|
const int64_t* data = decInputs[di].GetTensorMutableData<int64_t>();
|
||||||
std::cout << " value=" << data[0];
|
std::cout << " value=" << data[0];
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
@@ -702,10 +704,10 @@ namespace ANSCENTER
|
|||||||
auto boxInfo = decOutputs[boxesIdx].GetTensorTypeAndShapeInfo();
|
auto boxInfo = decOutputs[boxesIdx].GetTensorTypeAndShapeInfo();
|
||||||
auto boxShape = boxInfo.GetShape();
|
auto boxShape = boxInfo.GetShape();
|
||||||
int numBoxes = (boxShape.size() >= 1) ? static_cast<int>(boxShape[0]) : 0;
|
int numBoxes = (boxShape.size() >= 1) ? static_cast<int>(boxShape[0]) : 0;
|
||||||
const float* boxesData = decOutputs[boxesIdx].GetTensorData<float>();
|
const float* boxesData = decOutputs[boxesIdx].GetTensorMutableData<float>();
|
||||||
|
|
||||||
// Get scores
|
// Get scores
|
||||||
const float* scoresData = decOutputs[scoresIdx].GetTensorData<float>();
|
const float* scoresData = decOutputs[scoresIdx].GetTensorMutableData<float>();
|
||||||
|
|
||||||
// Get masks
|
// Get masks
|
||||||
auto maskInfo = decOutputs[masksIdx].GetTensorTypeAndShapeInfo();
|
auto maskInfo = decOutputs[masksIdx].GetTensorTypeAndShapeInfo();
|
||||||
@@ -713,7 +715,7 @@ namespace ANSCENTER
|
|||||||
// masks shape: [N, 1, H, W]
|
// masks shape: [N, 1, H, W]
|
||||||
int maskH = (maskShape.size() >= 3) ? static_cast<int>(maskShape[2]) : 0;
|
int maskH = (maskShape.size() >= 3) ? static_cast<int>(maskShape[2]) : 0;
|
||||||
int maskW = (maskShape.size() >= 4) ? static_cast<int>(maskShape[3]) : 0;
|
int maskW = (maskShape.size() >= 4) ? static_cast<int>(maskShape[3]) : 0;
|
||||||
const bool* masksData = decOutputs[masksIdx].GetTensorData<bool>();
|
const bool* masksData = decOutputs[masksIdx].GetTensorMutableData<bool>();
|
||||||
|
|
||||||
m_maskH = maskH;
|
m_maskH = maskH;
|
||||||
m_maskW = maskW;
|
m_maskW = maskW;
|
||||||
|
|||||||
@@ -575,7 +575,7 @@ namespace ANSCENTER
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::vector<Object> ANSONNXCL::postprocess(const std::vector<Ort::Value>& outputTensors, const std::string& camera_id) {
|
std::vector<Object> ANSONNXCL::postprocess(std::vector<Ort::Value>& outputTensors, const std::string& camera_id) {
|
||||||
ANS_DBG("ANSONNXCL_pp", "ENTRY tensors=%zu cam=%s this=%p",
|
ANS_DBG("ANSONNXCL_pp", "ENTRY tensors=%zu cam=%s this=%p",
|
||||||
outputTensors.size(), camera_id.c_str(), (void*)this);
|
outputTensors.size(), camera_id.c_str(), (void*)this);
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
@@ -589,8 +589,16 @@ namespace ANSCENTER
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
ANS_DBG("ANSONNXCL_pp", "GetTensorData<float>");
|
ANS_DBG("ANSONNXCL_pp", "GetTensorMutableData<float>");
|
||||||
const float* rawOutput = outputTensors[0].GetTensorData<float>();
|
// GetTensorMutableData (not GetTensorData) on DirectML. The const
|
||||||
|
// GetTensorData triggers a per-call host-readable mapping that on
|
||||||
|
// AMD DML exhausts a small staging-buffer pool after ~8 calls and
|
||||||
|
// blocks indefinitely. GetTensorMutableData returns the existing
|
||||||
|
// host-accessible pointer directly with no per-call mapping cost.
|
||||||
|
// Same pattern used by every output-tensor read in ANSONNXYOLO
|
||||||
|
// and engines/ONNXEngine. Safe on all EPs (CUDA/OpenVINO/CPU);
|
||||||
|
// we read the data only, never mutate it.
|
||||||
|
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
|
||||||
if (!rawOutput) {
|
if (!rawOutput) {
|
||||||
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN rawOutput=null");
|
ANS_DBG("ANSONNXCL_pp", "EARLY-RETURN rawOutput=null");
|
||||||
this->_logger.LogError("ANSONNXCL::postprocess", "rawOutput pointer is null", __FILE__, __LINE__);
|
this->_logger.LogError("ANSONNXCL::postprocess", "rawOutput pointer is null", __FILE__, __LINE__);
|
||||||
|
|||||||
@@ -28,7 +28,11 @@ namespace ANSCENTER {
|
|||||||
void warmupModel();
|
void warmupModel();
|
||||||
bool Init(const std::string& modelPath, const cv::Size& targetInputShape, bool useGPU = true);
|
bool Init(const std::string& modelPath, const cv::Size& targetInputShape, bool useGPU = true);
|
||||||
bool preprocess(const cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
|
bool preprocess(const cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
|
||||||
std::vector<Object> postprocess(const std::vector<Ort::Value>& outputTensors, const std::string& camera_id);
|
// outputTensors is non-const because GetTensorMutableData() (the
|
||||||
|
// ORT API that doesn't hang on AMD DirectML) requires a non-const
|
||||||
|
// Ort::Value receiver. See comment at the GetTensorMutableData
|
||||||
|
// call site in postprocess() for the full rationale.
|
||||||
|
std::vector<Object> postprocess(std::vector<Ort::Value>& outputTensors, const std::string& camera_id);
|
||||||
std::vector<Object> classify(const cv::Mat& image, const std::string& camera_id);
|
std::vector<Object> classify(const cv::Mat& image, const std::string& camera_id);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@@ -1089,7 +1089,7 @@ namespace ANSCENTER {
|
|||||||
std::vector<Object> ANSONNXOBB::postprocess(
|
std::vector<Object> ANSONNXOBB::postprocess(
|
||||||
const cv::Size& originalImageSize,
|
const cv::Size& originalImageSize,
|
||||||
const cv::Size& resizedImageShape,
|
const cv::Size& resizedImageShape,
|
||||||
const std::vector<Ort::Value>& outputTensors,
|
std::vector<Ort::Value>& outputTensors,
|
||||||
int topk,
|
int topk,
|
||||||
const std::string& camera_id)
|
const std::string& camera_id)
|
||||||
{
|
{
|
||||||
@@ -1103,8 +1103,10 @@ namespace ANSCENTER {
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract output tensor data and shape [1, num_features, num_detections]
|
// Extract output tensor data and shape [1, num_features, num_detections].
|
||||||
const float* rawOutput = outputTensors[0].GetTensorData<float>();
|
// GetTensorMutableData (not GetTensorData) on DML — const variant
|
||||||
|
// hangs on AMD after ~8 calls. Read-only despite the name.
|
||||||
|
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
|
||||||
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||||
|
|
||||||
if (outputShape.size() < 3) {
|
if (outputShape.size() < 3) {
|
||||||
|
|||||||
@@ -74,10 +74,12 @@ namespace ANSCENTER {
|
|||||||
void warmupModel();
|
void warmupModel();
|
||||||
bool Init(const std::string& modelPath, bool useGPU=true, int deviceId = 0);
|
bool Init(const std::string& modelPath, bool useGPU=true, int deviceId = 0);
|
||||||
cv::Mat preprocess(const cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
|
cv::Mat preprocess(const cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
|
||||||
|
// outputTensors is non-const because GetTensorMutableData() requires
|
||||||
|
// a non-const Ort::Value receiver — see ANSONNXCL.h for full note.
|
||||||
std::vector<Object> postprocess(
|
std::vector<Object> postprocess(
|
||||||
const cv::Size& originalImageSize,
|
const cv::Size& originalImageSize,
|
||||||
const cv::Size& resizedImageShape,
|
const cv::Size& resizedImageShape,
|
||||||
const std::vector<Ort::Value>& outputTensors, int topk,
|
std::vector<Ort::Value>& outputTensors, int topk,
|
||||||
const std::string& camera_id);
|
const std::string& camera_id);
|
||||||
std::vector<Object> detect(const cv::Mat& image, const std::string& camera_id);
|
std::vector<Object> detect(const cv::Mat& image, const std::string& camera_id);
|
||||||
private:
|
private:
|
||||||
|
|||||||
@@ -759,7 +759,7 @@ namespace ANSCENTER {
|
|||||||
std::vector<Object> ANSONNXPOSE::postprocess(
|
std::vector<Object> ANSONNXPOSE::postprocess(
|
||||||
const cv::Size& originalImageSize,
|
const cv::Size& originalImageSize,
|
||||||
const cv::Size& resizedImageShape,
|
const cv::Size& resizedImageShape,
|
||||||
const std::vector<Ort::Value>& outputTensors,
|
std::vector<Ort::Value>& outputTensors,
|
||||||
const std::string& camera_id)
|
const std::string& camera_id)
|
||||||
{
|
{
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
@@ -773,7 +773,9 @@ namespace ANSCENTER {
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
const float* rawOutput = outputTensors[0].GetTensorData<float>();
|
// GetTensorMutableData (not GetTensorData) on DML — const variant
|
||||||
|
// hangs on AMD after ~8 calls. Read-only despite the name.
|
||||||
|
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
|
||||||
if (!rawOutput) {
|
if (!rawOutput) {
|
||||||
this->_logger.LogError("ANSONNXPOSE::postprocess", "rawOutput pointer is null", __FILE__, __LINE__);
|
this->_logger.LogError("ANSONNXPOSE::postprocess", "rawOutput pointer is null", __FILE__, __LINE__);
|
||||||
return {};
|
return {};
|
||||||
|
|||||||
@@ -41,8 +41,10 @@ namespace ANSCENTER {
|
|||||||
void warmupModel();
|
void warmupModel();
|
||||||
bool Init(const std::string& modelPath, bool useGPU=true, int deviceId = 0);
|
bool Init(const std::string& modelPath, bool useGPU=true, int deviceId = 0);
|
||||||
cv::Mat preprocess(const cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
|
cv::Mat preprocess(const cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
|
||||||
|
// outputTensors is non-const because GetTensorMutableData() requires
|
||||||
|
// a non-const Ort::Value receiver — see ANSONNXCL.h for full note.
|
||||||
std::vector<Object> postprocess(const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
std::vector<Object> postprocess(const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
||||||
const std::vector<Ort::Value>& outputTensors, const std::string& camera_id);
|
std::vector<Ort::Value>& outputTensors, const std::string& camera_id);
|
||||||
std::vector<Object> detect(const cv::Mat& image, const std::string& camera_id);
|
std::vector<Object> detect(const cv::Mat& image, const std::string& camera_id);
|
||||||
private:
|
private:
|
||||||
static std::atomic<int> instanceCounter_; // Thread-safe counter
|
static std::atomic<int> instanceCounter_; // Thread-safe counter
|
||||||
|
|||||||
@@ -726,7 +726,7 @@ namespace ANSCENTER {
|
|||||||
std::vector<Object> ANSONNXSEG::postprocess(
|
std::vector<Object> ANSONNXSEG::postprocess(
|
||||||
const cv::Size& origSize,
|
const cv::Size& origSize,
|
||||||
const cv::Size& letterboxSize,
|
const cv::Size& letterboxSize,
|
||||||
const std::vector<Ort::Value>& outputs,
|
std::vector<Ort::Value>& outputs,
|
||||||
const std::string& camera_id)
|
const std::string& camera_id)
|
||||||
{
|
{
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
@@ -738,9 +738,11 @@ namespace ANSCENTER {
|
|||||||
std::to_string(outputs.size()));
|
std::to_string(outputs.size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract output tensors
|
// Extract output tensors. GetTensorMutableData (not GetTensorData)
|
||||||
const float* detections = outputs[0].GetTensorData<float>();
|
// on DML — const variant hangs on AMD after ~8 calls. Read-only
|
||||||
const float* prototypes = outputs[1].GetTensorData<float>();
|
// despite the name.
|
||||||
|
const float* detections = outputs[0].GetTensorMutableData<float>();
|
||||||
|
const float* prototypes = outputs[1].GetTensorMutableData<float>();
|
||||||
|
|
||||||
// Get tensor shapes
|
// Get tensor shapes
|
||||||
auto detectionShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape(); // [1, 116, N]
|
auto detectionShape = outputs[0].GetTensorTypeAndShapeInfo().GetShape(); // [1, 116, N]
|
||||||
|
|||||||
@@ -51,8 +51,10 @@ namespace ANSCENTER {
|
|||||||
void warmupModel();
|
void warmupModel();
|
||||||
bool Init(const std::string& modelPath, bool useGPU=true, int deviceId = 0);
|
bool Init(const std::string& modelPath, bool useGPU=true, int deviceId = 0);
|
||||||
cv::Mat preprocess(const cv::Mat& image,float*& blobPtr,std::vector<int64_t>& inputTensorShape);
|
cv::Mat preprocess(const cv::Mat& image,float*& blobPtr,std::vector<int64_t>& inputTensorShape);
|
||||||
|
// outputs is non-const because GetTensorMutableData() requires a
|
||||||
|
// non-const Ort::Value receiver — see ANSONNXCL.h for full note.
|
||||||
std::vector<Object> postprocess(const cv::Size& origSize,const cv::Size& letterboxSize,
|
std::vector<Object> postprocess(const cv::Size& origSize,const cv::Size& letterboxSize,
|
||||||
const std::vector<Ort::Value>& outputs, const std::string& camera_id);
|
std::vector<Ort::Value>& outputs, const std::string& camera_id);
|
||||||
std::vector<Object> segment(const cv::Mat& image, const std::string& camera_id);
|
std::vector<Object> segment(const cv::Mat& image, const std::string& camera_id);
|
||||||
std::vector<cv::Point2f> maskToPolygon(const cv::Mat& binaryMask,
|
std::vector<cv::Point2f> maskToPolygon(const cv::Mat& binaryMask,
|
||||||
const cv::Rect& boundingBox,
|
const cv::Rect& boundingBox,
|
||||||
|
|||||||
@@ -518,14 +518,16 @@ namespace ANSCENTER {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::vector<Object> YOLO12OD::postprocess(const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
std::vector<Object> YOLO12OD::postprocess(const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
||||||
const std::vector<Ort::Value>& outputTensors,
|
std::vector<Ort::Value>& outputTensors,
|
||||||
float confThreshold, float iouThreshold)
|
float confThreshold, float iouThreshold)
|
||||||
{
|
{
|
||||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||||
try {
|
try {
|
||||||
|
|
||||||
std::vector<Object> detections;
|
std::vector<Object> detections;
|
||||||
const float* rawOutput = outputTensors[0].GetTensorData<float>(); // Extract raw output data from the first output tensor
|
// GetTensorMutableData (not GetTensorData) on DML — const variant
|
||||||
|
// hangs on AMD after ~8 calls. Read-only despite the name.
|
||||||
|
const float* rawOutput = outputTensors[0].GetTensorMutableData<float>();
|
||||||
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
const std::vector<int64_t> outputShape = outputTensors[0].GetTensorTypeAndShapeInfo().GetShape();
|
||||||
|
|
||||||
// Determine the number of features and detections
|
// Determine the number of features and detections
|
||||||
|
|||||||
@@ -49,8 +49,11 @@ namespace ANSCENTER {
|
|||||||
std::vector<Object> detect(const cv::Mat& image, float confThreshold = 0.4f, float iouThreshold = 0.45f);
|
std::vector<Object> detect(const cv::Mat& image, float confThreshold = 0.4f, float iouThreshold = 0.45f);
|
||||||
//cv::Mat preprocess(const cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
|
//cv::Mat preprocess(const cv::Mat& image, float*& blob, std::vector<int64_t>& inputTensorShape);
|
||||||
cv::Mat preprocess(const cv::Mat& image, std::vector<float>& blob, std::vector<int64_t>& inputTensorShape);
|
cv::Mat preprocess(const cv::Mat& image, std::vector<float>& blob, std::vector<int64_t>& inputTensorShape);
|
||||||
|
// outputTensors is non-const because GetTensorMutableData()
|
||||||
|
// requires a non-const Ort::Value receiver — see ANSONNXCL.h
|
||||||
|
// for full note.
|
||||||
std::vector<Object> postprocess(const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
std::vector<Object> postprocess(const cv::Size& originalImageSize, const cv::Size& resizedImageShape,
|
||||||
const std::vector<Ort::Value>& outputTensors,
|
std::vector<Ort::Value>& outputTensors,
|
||||||
float confThreshold, float iouThreshold);
|
float confThreshold, float iouThreshold);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@@ -1867,12 +1867,13 @@ int main()
|
|||||||
//YOLO26POSEYolo11Test();
|
//YOLO26POSEYolo11Test();
|
||||||
//YOLO26CLYolo11Test();
|
//YOLO26CLYolo11Test();
|
||||||
//YOLO26ODYolo12Test();
|
//YOLO26ODYolo12Test();
|
||||||
YOLO26ODYolo11Test();
|
//YOLO26ODYolo11Test();
|
||||||
//YOLO26ODYolo10Test();
|
//YOLO26ODYolo10Test();
|
||||||
//YOLO26OBBYolo11Test();
|
//YOLO26OBBYolo11Test();
|
||||||
//SAM3ONNX_ImageTest(); // ORT reference — runs first, prints decoder input stats
|
//SAM3ONNX_ImageTest(); // ORT reference — runs first, prints decoder input stats
|
||||||
//SAM3TRT_ImageTest(); // TRT under test — compare decoder input stats with above
|
//SAM3TRT_ImageTest(); // TRT under test — compare decoder input stats with above
|
||||||
//CustomModel_StressTest_FilePlayer(); // Multi-task stress test (LabVIEW flow)
|
//CustomModel_StressTest_FilePlayer(); // Multi-task stress test (LabVIEW flow)
|
||||||
|
CustomModel_SingleStream_FilePlayer(); // 1 cam + 1 task — isolates concurrency from per-instance bugs
|
||||||
//SAM3TRT_UnitTest(); // TensorRT SAM3 test (in ANSSAM3-UnitTest.cpp)
|
//SAM3TRT_UnitTest(); // TensorRT SAM3 test (in ANSSAM3-UnitTest.cpp)
|
||||||
//TensorRT10Test();
|
//TensorRT10Test();
|
||||||
//FireNSmokeCustomDetection();
|
//FireNSmokeCustomDetection();
|
||||||
|
|||||||
@@ -117,3 +117,4 @@ int TestYOLOV12();
|
|||||||
int PPETest();
|
int PPETest();
|
||||||
int RVATest();
|
int RVATest();
|
||||||
int CustomModel_StressTest_FilePlayer();
|
int CustomModel_StressTest_FilePlayer();
|
||||||
|
int CustomModel_SingleStream_FilePlayer(); // 1 camera + 1 task — isolates concurrency from per-instance bugs
|
||||||
@@ -292,15 +292,15 @@ int CustomModel_StressTest_FilePlayer() {
|
|||||||
|
|
||||||
// Video files (one per stream)
|
// Video files (one per stream)
|
||||||
const std::string videoFiles[NUM_STREAMS] = {
|
const std::string videoFiles[NUM_STREAMS] = {
|
||||||
"E:\\Programs\\DemoAssets\\Videos\\Helmet\\HM1.mp4",
|
"C:\\ProgramData\\ANSCENTER\\Shared\\HM1.mp4",
|
||||||
"E:\\Programs\\DemoAssets\\Videos\\Helmet\\HM2.mp4",
|
"C:\\ProgramData\\ANSCENTER\\Shared\\HM2.mp4",
|
||||||
};
|
};
|
||||||
|
|
||||||
// Which stream each task uses
|
// Which stream each task uses
|
||||||
const int taskStreamMap[NUM_TASKS] = { 0, 0, 1, 1 };
|
const int taskStreamMap[NUM_TASKS] = { 0, 0, 1, 1 };
|
||||||
|
|
||||||
// Model config — EDIT for your custom model
|
// Model config — EDIT for your custom model
|
||||||
const std::string modelFolder = "C:\\Projects\\ANSVIS\\Models\\ANS_Helmet_v2.0.zip";
|
const std::string modelFolder = "C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\Models\\B-IN_ANS_Helmet_v2.0_102728911.zip";
|
||||||
//const char* modelName = "detector";
|
//const char* modelName = "detector";
|
||||||
//const char* className = "detector.names";
|
//const char* className = "detector.names";
|
||||||
const int modelType = 16; // 16 = CustomDetector, 31 = RTYOLO, 30 = ONNXYOLO
|
const int modelType = 16; // 16 = CustomDetector, 31 = RTYOLO, 30 = ONNXYOLO
|
||||||
@@ -550,3 +550,237 @@ int CustomModel_StressTest_FilePlayer() {
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// CustomModel_SingleStream_FilePlayer
|
||||||
|
//
|
||||||
|
// ISOLATION TEST — 1 camera, 1 OD handle, 1 worker thread. No concurrent
|
||||||
|
// inference whatsoever. Same flow as CustomModel_StressTest_FilePlayer
|
||||||
|
// (FilePlayer → CloneImage → RunInferenceComplete_CPP → ReleaseImage), just
|
||||||
|
// without the multi-stream / multi-task fan-out.
|
||||||
|
//
|
||||||
|
// Use this to determine whether a hang is per-instance (will still hang here)
|
||||||
|
// or only triggered by cross-session DML contention (will NOT hang here).
|
||||||
|
// If THIS test runs cleanly for an extended period but the multi-stream
|
||||||
|
// stress test hangs after a few inferences, the issue is concurrent DML
|
||||||
|
// submissions on the AMD iGPU — not a bug in the engine code itself.
|
||||||
|
//
|
||||||
|
// Reuses helpers from CustomModel_StressTest_FilePlayer:
|
||||||
|
// LoadANSCV / UnloadANSCV, ODWorkerThread, GetPerGpuFreeMiB.
|
||||||
|
// =============================================================================
|
||||||
|
int CustomModel_SingleStream_FilePlayer() {
|
||||||
|
printf("\n");
|
||||||
|
printf("============================================================\n");
|
||||||
|
printf(" Custom Model SINGLE-STREAM Isolation Test (FilePlayer)\n");
|
||||||
|
printf(" 1 camera + 1 model + 1 worker thread\n");
|
||||||
|
printf(" Press ESC to stop\n");
|
||||||
|
printf("============================================================\n\n");
|
||||||
|
|
||||||
|
// --- Load ANSCV.dll at runtime (same helper as stress test) ---
|
||||||
|
if (!LoadANSCV()) return -1;
|
||||||
|
if (pInitCameraNetwork) pInitCameraNetwork();
|
||||||
|
|
||||||
|
// =====================================================================
|
||||||
|
// CONFIGURATION — EDIT THESE FOR YOUR TEST
|
||||||
|
// =====================================================================
|
||||||
|
const std::string videoFile =
|
||||||
|
"C:\\ProgramData\\ANSCENTER\\Shared\\HM1.mp4";
|
||||||
|
|
||||||
|
const std::string modelFolder =
|
||||||
|
"C:\\ProgramData\\ANSCENTER\\ANSVIS Server\\Models\\B-IN_ANS_Helmet_v2.0_102728911.zip";
|
||||||
|
const int modelType = 16; // 16 = CustomDetector (same as stress test)
|
||||||
|
const int detectorType = 1; // Detection
|
||||||
|
const float scoreThresh = 0.5f;
|
||||||
|
const float confThresh = 0.5f;
|
||||||
|
const float nmsThresh = 0.45f;
|
||||||
|
// =====================================================================
|
||||||
|
|
||||||
|
// Reset shared run flag (it's a static at file scope shared with stress test)
|
||||||
|
g_stressRunning.store(true);
|
||||||
|
|
||||||
|
std::cout << "\n--- Single-stream isolation test (no concurrency) ---\n" << std::endl;
|
||||||
|
// NOTE: deliberately NOT calling OptimizeModelStr here. OptimizeModelStr
|
||||||
|
// creates a separate "warmup" ANSCUSTOM instance whose detector and
|
||||||
|
// classifier sessions stay loaded for the lifetime of the process — even
|
||||||
|
// though that instance never runs inference, its 2 DML sessions hold AMD
|
||||||
|
// GPU resources and were suspected of contributing to a hang in the
|
||||||
|
// active session's GetTensorData<float>. Skipping it here leaves exactly
|
||||||
|
// 1 ANSCUSTOM = 2 DML sessions (detector + classifier) in the process,
|
||||||
|
// for the cleanest possible single-session isolation.
|
||||||
|
(void)detectorType; // unused without the OptimizeModelStr call
|
||||||
|
|
||||||
|
// --- Per-task state (just one) ---
|
||||||
|
StressTaskState taskState;
|
||||||
|
|
||||||
|
// --- Create FilePlayer (single stream) ---
|
||||||
|
void* fpClient = nullptr;
|
||||||
|
{
|
||||||
|
printf("[Stream0] Creating FilePlayer: %s\n", videoFile.c_str());
|
||||||
|
int result = pCreateFilePlayer(&fpClient, "", videoFile.c_str());
|
||||||
|
if (result != 1 || !fpClient) {
|
||||||
|
printf("[Stream0] FAILED to create FilePlayer (result=%d)\n", result);
|
||||||
|
UnloadANSCV();
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
if (pSetFilePlayerDisplayRes) {
|
||||||
|
pSetFilePlayerDisplayRes(&fpClient, 1920, 1080);
|
||||||
|
}
|
||||||
|
printf("[Stream0] FilePlayer created (display: 1920x1080)\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Create OD handle (single instance) ---
|
||||||
|
ANSCENTER::ANSODBase* odHandle = nullptr;
|
||||||
|
{
|
||||||
|
printf("[Task0] Creating OD handle (modelType=%d)...\n", modelType);
|
||||||
|
auto loadStart = std::chrono::steady_clock::now();
|
||||||
|
auto vramBefore = GetPerGpuFreeMiB();
|
||||||
|
|
||||||
|
std::string labelMap = CreateANSODHandle(
|
||||||
|
&odHandle,
|
||||||
|
"", // licenseKey
|
||||||
|
modelFolder.c_str(), // modelFilePath (zip or folder)
|
||||||
|
"", // modelZipFilePassword
|
||||||
|
scoreThresh,
|
||||||
|
confThresh,
|
||||||
|
nmsThresh,
|
||||||
|
1, // autoDetectEngine
|
||||||
|
modelType,
|
||||||
|
1, // detectionType (1 = Detection)
|
||||||
|
1); // loadEngineOnCreation
|
||||||
|
|
||||||
|
auto loadEnd = std::chrono::steady_clock::now();
|
||||||
|
double loadMs = std::chrono::duration<double, std::milli>(loadEnd - loadStart).count();
|
||||||
|
|
||||||
|
if (!odHandle) {
|
||||||
|
printf("[Task0] FAILED to create OD handle\n");
|
||||||
|
pStopFilePlayer(&fpClient);
|
||||||
|
pReleaseFilePlayer(&fpClient);
|
||||||
|
UnloadANSCV();
|
||||||
|
return -3;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto vramAfter = GetPerGpuFreeMiB();
|
||||||
|
int bestGpu = 0;
|
||||||
|
size_t maxDelta = 0;
|
||||||
|
for (size_t g = 0; g < vramBefore.size() && g < vramAfter.size(); g++) {
|
||||||
|
size_t delta = (vramBefore[g] > vramAfter[g]) ? vramBefore[g] - vramAfter[g] : 0;
|
||||||
|
if (delta > maxDelta) { maxDelta = delta; bestGpu = (int)g; }
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("[Task0] Model loaded in %.0f ms | GPU[%d] | VRAM: %zu MiB | Labels: %s\n",
|
||||||
|
loadMs, bestGpu, maxDelta,
|
||||||
|
labelMap.empty() ? "(none)" : labelMap.substr(0, 80).c_str());
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lk(taskState.mtx);
|
||||||
|
taskState.engineLoaded = true;
|
||||||
|
taskState.statusMsg = "Running";
|
||||||
|
taskState.gpuDeviceId = bestGpu;
|
||||||
|
taskState.vramUsedMiB = maxDelta;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Start playback ---
|
||||||
|
pStartFilePlayer(&fpClient);
|
||||||
|
printf("[Stream0] Playback started\n");
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(500));
|
||||||
|
|
||||||
|
// --- Single worker thread (reuse ODWorkerThread from stress test) ---
|
||||||
|
std::thread worker(ODWorkerThread, /*taskId=*/0, fpClient, odHandle, std::ref(taskState));
|
||||||
|
|
||||||
|
// --- Display loop (single cell) ---
|
||||||
|
const int cellW = 1280, cellH = 720;
|
||||||
|
const char* windowName = "Custom Model — Single Stream Isolation";
|
||||||
|
cv::namedWindow(windowName, cv::WINDOW_NORMAL);
|
||||||
|
cv::resizeWindow(windowName, cellW, cellH + 40);
|
||||||
|
|
||||||
|
auto testStart = std::chrono::steady_clock::now();
|
||||||
|
|
||||||
|
while (g_stressRunning.load()) {
|
||||||
|
cv::Mat canvas(cellH + 40, cellW, CV_8UC3, cv::Scalar(30, 30, 30));
|
||||||
|
|
||||||
|
cv::Mat cell;
|
||||||
|
double fps = 0, infMs = 0, grabMs = 0;
|
||||||
|
int fCount = 0, dCount = 0, gpuId = -1;
|
||||||
|
std::string statusMsg, lastDet;
|
||||||
|
bool engineLoaded = false;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lk(taskState.mtx);
|
||||||
|
if (!taskState.displayFrame.empty()) {
|
||||||
|
cv::resize(taskState.displayFrame, cell, cv::Size(cellW, cellH));
|
||||||
|
}
|
||||||
|
fps = taskState.fps;
|
||||||
|
infMs = taskState.inferenceMs;
|
||||||
|
grabMs = taskState.grabMs;
|
||||||
|
fCount = taskState.frameCount;
|
||||||
|
dCount = taskState.detectionCount;
|
||||||
|
gpuId = taskState.gpuDeviceId;
|
||||||
|
statusMsg = taskState.statusMsg;
|
||||||
|
lastDet = taskState.lastDetection;
|
||||||
|
engineLoaded = taskState.engineLoaded;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cell.empty()) {
|
||||||
|
cell = cv::Mat(cellH, cellW, CV_8UC3, cv::Scalar(40, 40, 40));
|
||||||
|
cv::putText(cell, "Task 0: " + statusMsg,
|
||||||
|
cv::Point(20, cellH / 2),
|
||||||
|
cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(100, 100, 255), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::rectangle(cell, cv::Rect(0, cellH - 45, cellW, 45),
|
||||||
|
cv::Scalar(0, 0, 0), cv::FILLED);
|
||||||
|
char bar1[256], bar2[128];
|
||||||
|
snprintf(bar1, sizeof(bar1),
|
||||||
|
"%.1f FPS | inf:%.0fms grab:%.0fms | Frames:%d | Det:%d",
|
||||||
|
fps, infMs, grabMs, fCount, dCount);
|
||||||
|
snprintf(bar2, sizeof(bar2), "GPU[%d] | last:%s",
|
||||||
|
gpuId, lastDet.empty() ? "-" : lastDet.c_str());
|
||||||
|
cv::Scalar barColor = engineLoaded ? cv::Scalar(0, 255, 0) : cv::Scalar(0, 100, 255);
|
||||||
|
cv::putText(cell, bar1, cv::Point(5, cellH - 25),
|
||||||
|
cv::FONT_HERSHEY_SIMPLEX, 0.5, barColor, 1);
|
||||||
|
cv::putText(cell, bar2, cv::Point(5, cellH - 5),
|
||||||
|
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 200, 255), 1);
|
||||||
|
|
||||||
|
cell.copyTo(canvas(cv::Rect(0, 0, cellW, cellH)));
|
||||||
|
|
||||||
|
double elapsed = std::chrono::duration<double>(
|
||||||
|
std::chrono::steady_clock::now() - testStart).count();
|
||||||
|
char bottomBar[256];
|
||||||
|
snprintf(bottomBar, sizeof(bottomBar),
|
||||||
|
"Single-stream | Elapsed: %.0fs | %.1f FPS | Press ESC to stop",
|
||||||
|
elapsed, fps);
|
||||||
|
cv::putText(canvas, bottomBar, cv::Point(10, cellH + 25),
|
||||||
|
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(200, 200, 200), 1);
|
||||||
|
|
||||||
|
cv::imshow(windowName, canvas);
|
||||||
|
int key = cv::waitKey(30);
|
||||||
|
if (key == 27) {
|
||||||
|
printf("\nESC pressed - stopping...\n");
|
||||||
|
g_stressRunning.store(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Waiting for worker thread...\n");
|
||||||
|
if (worker.joinable()) worker.join();
|
||||||
|
|
||||||
|
double totalElapsed = std::chrono::duration<double>(
|
||||||
|
std::chrono::steady_clock::now() - testStart).count();
|
||||||
|
printf("\n============================================================\n");
|
||||||
|
printf(" SINGLE-STREAM SUMMARY (runtime: %.0fs)\n", totalElapsed);
|
||||||
|
printf("============================================================\n");
|
||||||
|
printf(" GPU[%d] | %d frames | %d detections | %.1f FPS | Inf: %.0fms\n",
|
||||||
|
taskState.gpuDeviceId, taskState.frameCount, taskState.detectionCount,
|
||||||
|
taskState.fps, taskState.inferenceMs);
|
||||||
|
printf("============================================================\n");
|
||||||
|
|
||||||
|
if (odHandle) ReleaseANSODHandle(&odHandle);
|
||||||
|
if (fpClient) {
|
||||||
|
pStopFilePlayer(&fpClient);
|
||||||
|
pReleaseFilePlayer(&fpClient);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::destroyAllWindows();
|
||||||
|
if (pDeinitCameraNetwork) pDeinitCameraNetwork();
|
||||||
|
UnloadANSCV();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user