diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 17f3181..9f1a871 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -95,7 +95,13 @@
"Bash(git add *)",
"Read(//c/ProgramData/Jh7O7nUe7vS/Models/EngineModels/B-IN_ANS_VehicleDetection_v2.0_67345015/**)",
"Bash(xxd)",
- "Bash(icacls \"C:\\\\ProgramData\\\\Jh7O7nUe7vS\\\\Models\\\\EngineModels\\\\B-IN_ANS_VehicleDetection_v2.0_67345015\\\\train_last.onnx\")"
+ "Bash(icacls \"C:\\\\ProgramData\\\\Jh7O7nUe7vS\\\\Models\\\\EngineModels\\\\B-IN_ANS_VehicleDetection_v2.0_67345015\\\\train_last.onnx\")",
+ "Bash(grep -oE ']*>[^<]{0,400}' \"C:/Users/nghia/Downloads/error.xml\")",
+ "Bash(grep -oE ']*>[^<]{0,500}' \"/c/Users/nghia/Downloads/error.xml\")",
+ "Read(//tmp/**)",
+ "Bash(grep -oE ']*>[^<]{0,400}' \"/c/Users/nghia/Downloads/error.xml\")",
+ "Bash(echo \"Exit: $?\")",
+ "Bash(python -)"
]
}
}
diff --git a/core/ANSLicensingSystem/Utility.cpp b/core/ANSLicensingSystem/Utility.cpp
index 5ee3fc6..2614a2c 100644
--- a/core/ANSLicensingSystem/Utility.cpp
+++ b/core/ANSLicensingSystem/Utility.cpp
@@ -7,13 +7,24 @@
// Per-path mutex to serialize concurrent zip operations on the same target.
// Without this, two LabVIEW threads can race: one extracting a zip while
// another truncates/writes the same file, corrupting data and crashing LabVIEW.
+// Also used to serialize extract ↔ ONNX session creation on the same extracted
+// model folder — without that, thread A can finish extraction and begin opening
+// train_last.onnx while thread B re-enters extraction and truncates the file,
+// producing "system error number 13" (EACCES) on the first reader.
static std::mutex g_zipPathMapMutex;
-static std::map> g_zipPathLocks;
+static std::map> g_zipPathLocks;
-static std::shared_ptr GetZipPathLock(const std::string& path) {
+static std::shared_ptr GetZipPathLock(const std::string& path) {
std::lock_guard guard(g_zipPathMapMutex);
auto& ptr = g_zipPathLocks[path];
- if (!ptr) ptr = std::make_shared();
+ if (!ptr) ptr = std::make_shared();
+ return ptr;
+}
+
+std::shared_ptr GetModelFolderLock(const std::string& folderPath) {
+ auto ptr = GetZipPathLock(folderPath);
+ ANS_DBG("ModelLock", "GetModelFolderLock: folder=%s mutex=%p",
+ folderPath.c_str(), (void*)ptr.get());
return ptr;
}
@@ -453,7 +464,7 @@ bool AddFolderContentsToZip(zip* archive, const char* folderPath, const char* zi
bool ZipFolderWithPassword(const char* folderPath, const char* zipFilePath, const char* password) {
auto pathLock = GetZipPathLock(std::string(zipFilePath));
- std::lock_guard zipGuard(*pathLock);
+ std::lock_guard zipGuard(*pathLock);
zip* zipArchive;
zip_flags_t flags = ZIP_CREATE | ZIP_TRUNCATE;
@@ -839,10 +850,64 @@ std::string GetDateTimeString(const std::string& format) {
bool ExtractProtectedZipFile(const std::string& zipFileName, const std::string& password, const std::string& modelName, const std::string outputFolder)
{
auto pathLock = GetZipPathLock(outputFolder);
- std::lock_guard zipGuard(*pathLock);
+ std::lock_guard zipGuard(*pathLock);
int error;
if (!FileExist(zipFileName))return false;
+
+ // Idempotent fast-path: if the target folder already has a complete, fresh
+ // extraction (at least one non-empty regular file, all >= the zip's mtime),
+ // skip re-extraction. This prevents redundant passes from concurrent
+ // CreateANSODHandle calls from truncating files that another thread is
+ // already mmap'ing via ORT (which surfaces as EACCES / system error 13).
+ ANS_DBG("Extract", "ExtractProtectedZipFile: zip=%s -> folder=%s",
+ zipFileName.c_str(), outputFolder.c_str());
+ try {
+ if (std::filesystem::exists(outputFolder) &&
+ std::filesystem::is_directory(outputFolder))
+ {
+ const auto zipTime = std::filesystem::last_write_time(zipFileName);
+ bool anyFile = false;
+ bool allFresh = true;
+ size_t numFiles = 0;
+ std::string staleFile;
+ for (const auto& e : std::filesystem::directory_iterator(outputFolder)) {
+ if (!e.is_regular_file()) continue;
+ anyFile = true;
+ ++numFiles;
+ std::error_code ec;
+ const auto sz = e.file_size(ec);
+ if (ec || sz == 0) {
+ allFresh = false;
+ staleFile = e.path().filename().string() + " (zero/err)";
+ break;
+ }
+ const auto ft = std::filesystem::last_write_time(e.path(), ec);
+ if (ec || ft < zipTime) {
+ allFresh = false;
+ staleFile = e.path().filename().string() + " (older than zip)";
+ break;
+ }
+ }
+ if (anyFile && allFresh) {
+ ANS_DBG("Extract", "ExtractProtectedZipFile: SKIP re-extract — %zu file(s) already fresh in %s",
+ numFiles, outputFolder.c_str());
+ return true; // already extracted and up-to-date
+ }
+ ANS_DBG("Extract",
+ "ExtractProtectedZipFile: full extract needed — anyFile=%d stale=%s",
+ anyFile ? 1 : 0,
+ staleFile.empty() ? "(empty folder)" : staleFile.c_str());
+ } else {
+ ANS_DBG("Extract", "ExtractProtectedZipFile: folder absent, full extract");
+ }
+ }
+ catch (const std::exception& ex) {
+ // Any filesystem hiccup: fall through to full extraction.
+ ANS_DBG("Extract", "ExtractProtectedZipFile: freshness check threw, extracting: %s",
+ ex.what());
+ }
+
zip_t* archive = zip_open(zipFileName.c_str(), ZIP_RDONLY, &error);
if (!archive) {
std::cerr << "Error opening ZIP archive: " << zip_strerror(archive) << std::endl;
diff --git a/core/ANSLicensingSystem/Utility.h b/core/ANSLicensingSystem/Utility.h
index dcb8b24..4c78cd2 100644
--- a/core/ANSLicensingSystem/Utility.h
+++ b/core/ANSLicensingSystem/Utility.h
@@ -16,6 +16,8 @@
#include
#include
#include
+#include
+#include
//namespace logging = boost::log;
//namespace attrs = boost::log::attributes;
@@ -89,4 +91,12 @@ namespace fs = std::filesystem;
// For training engine
//bool ExtractPasswordProtectedZipForTrainingEgnine(const std::string& zipFileName, const std::string& password, const std::string& modelName, std::string& outputFolder, bool edgeDeviceModel = true);
ANSLICENSE_API bool ExtractProtectedZipFile(const std::string& zipFileName,const std::string& password,const std::string& modelName,const std::string outputFolder);
+
+ // Per-path mutex for a model folder. Used to serialize extract ↔ session
+ // creation on the same extracted folder so concurrent CreateANSODHandle calls
+ // cannot truncate/rewrite a model file while another thread is loading it.
+ // Keyed by folder path (not zip path) so both extractor and consumer agree.
+ // Returns std::timed_mutex so callers can bound their wait and avoid a hang
+ // if a peer thread deadlocks inside extraction or ORT session creation.
+ ANSLICENSE_API std::shared_ptr GetModelFolderLock(const std::string& folderPath);
#endif
\ No newline at end of file
diff --git a/modules/ANSODEngine/ANSONNXYOLO.cpp b/modules/ANSODEngine/ANSONNXYOLO.cpp
index 8cdfda6..d7ba175 100644
--- a/modules/ANSODEngine/ANSONNXYOLO.cpp
+++ b/modules/ANSODEngine/ANSONNXYOLO.cpp
@@ -1775,12 +1775,53 @@ namespace ANSCENTER {
labelMap = VectorToCommaSeparatedString(_classes);
if (this->_loadEngineOnCreation) {
+ // Hold the model-folder lock across session creation so a
+ // concurrent CreateANSODHandle on the same model cannot
+ // re-enter ExtractProtectedZipFile and truncate the .onnx
+ // file while ORT is opening it (which would surface as
+ // "system error number 13" EACCES from the ORT loader).
+ //
+ // Timed wait — 120s ceiling for extract + ORT session
+ // creation (GPU EP compile can take ~30s on large models).
+ // If we hit the timeout, the peer thread is deadlocked or
+ // wedged; fail the load instead of hanging the caller.
+ auto _folderLock = GetModelFolderLock(_modelFolder);
+ std::unique_lock _folderGuard(
+ *_folderLock, std::defer_lock);
+ ANS_DBG("ONNXYOLO", "Initialize: waiting on folder lock (120s): %s",
+ _modelFolder.c_str());
+ auto _lockT0 = std::chrono::steady_clock::now();
+ if (!_folderGuard.try_lock_for(std::chrono::seconds(120))) {
+ auto waitedMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _lockT0).count();
+ ANS_DBG("ONNXYOLO",
+ "Initialize: TIMEOUT on folder lock after %lldms: %s",
+ (long long)waitedMs, _modelFolder.c_str());
+ _logger.LogError("ANSONNXYOLO::Initialize",
+ "Timed out waiting for model-folder lock: " + _modelFolder,
+ __FILE__, __LINE__);
+ return false;
+ }
+ auto waitedMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _lockT0).count();
+ ANS_DBG("ONNXYOLO", "Initialize: folder lock acquired in %lldms, calling InitOrtEngine",
+ (long long)waitedMs);
+ auto _initT0 = std::chrono::steady_clock::now();
if (!InitOrtEngine()) {
+ auto initMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _initT0).count();
+ ANS_DBG("ONNXYOLO",
+ "Initialize: InitOrtEngine FAILED after %lldms, model=%s",
+ (long long)initMs, _modelFilePath.c_str());
_logger.LogError("ANSONNXYOLO::Initialize",
"Failed to create ONNX Runtime engine: " + _modelFilePath,
__FILE__, __LINE__);
return false;
}
+ auto initMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _initT0).count();
+ ANS_DBG("ONNXYOLO", "Initialize: InitOrtEngine OK in %lldms",
+ (long long)initMs);
}
// Fix input resolution for dynamic-shape models.
@@ -1845,7 +1886,45 @@ namespace ANSCENTER {
}
if (this->_loadEngineOnCreation) {
- if (!InitOrtEngine()) { _modelLoadValid = false; return false; }
+ // See ANSONNXYOLO::Initialize — hold folder lock so a sibling
+ // extraction cannot truncate train_last.onnx mid-load. Timed
+ // wait so a stuck peer cannot hang this thread forever.
+ auto _folderLock = GetModelFolderLock(_modelFolder);
+ std::unique_lock _folderGuard(
+ *_folderLock, std::defer_lock);
+ ANS_DBG("ONNXYOLO", "LoadModel: waiting on folder lock (120s): %s",
+ _modelFolder.c_str());
+ auto _lockT0 = std::chrono::steady_clock::now();
+ if (!_folderGuard.try_lock_for(std::chrono::seconds(120))) {
+ auto waitedMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _lockT0).count();
+ ANS_DBG("ONNXYOLO",
+ "LoadModel: TIMEOUT on folder lock after %lldms: %s",
+ (long long)waitedMs, _modelFolder.c_str());
+ _logger.LogError("ANSONNXYOLO::LoadModel",
+ "Timed out waiting for model-folder lock: " + _modelFolder,
+ __FILE__, __LINE__);
+ _modelLoadValid = false;
+ return false;
+ }
+ auto waitedMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _lockT0).count();
+ ANS_DBG("ONNXYOLO", "LoadModel: folder lock acquired in %lldms, calling InitOrtEngine",
+ (long long)waitedMs);
+ auto _initT0 = std::chrono::steady_clock::now();
+ if (!InitOrtEngine()) {
+ auto initMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _initT0).count();
+ ANS_DBG("ONNXYOLO",
+ "LoadModel: InitOrtEngine FAILED after %lldms, model=%s",
+ (long long)initMs, _modelFilePath.c_str());
+ _modelLoadValid = false;
+ return false;
+ }
+ auto initMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _initT0).count();
+ ANS_DBG("ONNXYOLO", "LoadModel: InitOrtEngine OK in %lldms",
+ (long long)initMs);
}
// Fix input resolution for dynamic-shape models (same as primary Initialize)
@@ -1920,7 +1999,45 @@ namespace ANSCENTER {
labelMap = VectorToCommaSeparatedString(_classes);
if (this->_loadEngineOnCreation) {
- if (!InitOrtEngine()) { _modelLoadValid = false; return false; }
+ // See ANSONNXYOLO::Initialize — hold folder lock so a sibling
+ // extraction cannot truncate the model file mid-load. Timed
+ // wait so a stuck peer cannot hang this thread forever.
+ auto _folderLock = GetModelFolderLock(_modelFolder);
+ std::unique_lock _folderGuard(
+ *_folderLock, std::defer_lock);
+ ANS_DBG("ONNXYOLO", "LoadModelFromFolder: waiting on folder lock (120s): %s",
+ _modelFolder.c_str());
+ auto _lockT0 = std::chrono::steady_clock::now();
+ if (!_folderGuard.try_lock_for(std::chrono::seconds(120))) {
+ auto waitedMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _lockT0).count();
+ ANS_DBG("ONNXYOLO",
+ "LoadModelFromFolder: TIMEOUT on folder lock after %lldms: %s",
+ (long long)waitedMs, _modelFolder.c_str());
+ _logger.LogError("ANSONNXYOLO::LoadModelFromFolder",
+ "Timed out waiting for model-folder lock: " + _modelFolder,
+ __FILE__, __LINE__);
+ _modelLoadValid = false;
+ return false;
+ }
+ auto waitedMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _lockT0).count();
+ ANS_DBG("ONNXYOLO", "LoadModelFromFolder: folder lock acquired in %lldms, calling InitOrtEngine",
+ (long long)waitedMs);
+ auto _initT0 = std::chrono::steady_clock::now();
+ if (!InitOrtEngine()) {
+ auto initMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _initT0).count();
+ ANS_DBG("ONNXYOLO",
+ "LoadModelFromFolder: InitOrtEngine FAILED after %lldms, model=%s",
+ (long long)initMs, _modelFilePath.c_str());
+ _modelLoadValid = false;
+ return false;
+ }
+ auto initMs = std::chrono::duration_cast(
+ std::chrono::steady_clock::now() - _initT0).count();
+ ANS_DBG("ONNXYOLO", "LoadModelFromFolder: InitOrtEngine OK in %lldms",
+ (long long)initMs);
}
// Fix input resolution for dynamic-shape models (same as primary Initialize)