Fix Concurrent extract + open race on train_last.onnx

This commit is contained in:
2026-04-24 11:29:28 +10:00
parent 91bdb3f96b
commit baa88bcc48
4 changed files with 206 additions and 8 deletions

View File

@@ -7,13 +7,24 @@
// Per-path mutex to serialize concurrent zip operations on the same target.
// Without this, two LabVIEW threads can race: one extracting a zip while
// another truncates/writes the same file, corrupting data and crashing LabVIEW.
// Also used to serialize extract ↔ ONNX session creation on the same extracted
// model folder — without that, thread A can finish extraction and begin opening
// train_last.onnx while thread B re-enters extraction and truncates the file,
// producing "system error number 13" (EACCES) on the first reader.
static std::mutex g_zipPathMapMutex;
static std::map<std::string, std::shared_ptr<std::mutex>> g_zipPathLocks;
static std::map<std::string, std::shared_ptr<std::timed_mutex>> g_zipPathLocks;
static std::shared_ptr<std::mutex> GetZipPathLock(const std::string& path) {
static std::shared_ptr<std::timed_mutex> GetZipPathLock(const std::string& path) {
std::lock_guard<std::mutex> guard(g_zipPathMapMutex);
auto& ptr = g_zipPathLocks[path];
if (!ptr) ptr = std::make_shared<std::mutex>();
if (!ptr) ptr = std::make_shared<std::timed_mutex>();
return ptr;
}
std::shared_ptr<std::timed_mutex> GetModelFolderLock(const std::string& folderPath) {
auto ptr = GetZipPathLock(folderPath);
ANS_DBG("ModelLock", "GetModelFolderLock: folder=%s mutex=%p",
folderPath.c_str(), (void*)ptr.get());
return ptr;
}
@@ -453,7 +464,7 @@ bool AddFolderContentsToZip(zip* archive, const char* folderPath, const char* zi
bool ZipFolderWithPassword(const char* folderPath, const char* zipFilePath, const char* password) {
auto pathLock = GetZipPathLock(std::string(zipFilePath));
std::lock_guard<std::mutex> zipGuard(*pathLock);
std::lock_guard<std::timed_mutex> zipGuard(*pathLock);
zip* zipArchive;
zip_flags_t flags = ZIP_CREATE | ZIP_TRUNCATE;
@@ -839,10 +850,64 @@ std::string GetDateTimeString(const std::string& format) {
bool ExtractProtectedZipFile(const std::string& zipFileName, const std::string& password, const std::string& modelName, const std::string outputFolder)
{
auto pathLock = GetZipPathLock(outputFolder);
std::lock_guard<std::mutex> zipGuard(*pathLock);
std::lock_guard<std::timed_mutex> zipGuard(*pathLock);
int error;
if (!FileExist(zipFileName))return false;
// Idempotent fast-path: if the target folder already has a complete, fresh
// extraction (at least one non-empty regular file, all >= the zip's mtime),
// skip re-extraction. This prevents redundant passes from concurrent
// CreateANSODHandle calls from truncating files that another thread is
// already mmap'ing via ORT (which surfaces as EACCES / system error 13).
ANS_DBG("Extract", "ExtractProtectedZipFile: zip=%s -> folder=%s",
zipFileName.c_str(), outputFolder.c_str());
try {
if (std::filesystem::exists(outputFolder) &&
std::filesystem::is_directory(outputFolder))
{
const auto zipTime = std::filesystem::last_write_time(zipFileName);
bool anyFile = false;
bool allFresh = true;
size_t numFiles = 0;
std::string staleFile;
for (const auto& e : std::filesystem::directory_iterator(outputFolder)) {
if (!e.is_regular_file()) continue;
anyFile = true;
++numFiles;
std::error_code ec;
const auto sz = e.file_size(ec);
if (ec || sz == 0) {
allFresh = false;
staleFile = e.path().filename().string() + " (zero/err)";
break;
}
const auto ft = std::filesystem::last_write_time(e.path(), ec);
if (ec || ft < zipTime) {
allFresh = false;
staleFile = e.path().filename().string() + " (older than zip)";
break;
}
}
if (anyFile && allFresh) {
ANS_DBG("Extract", "ExtractProtectedZipFile: SKIP re-extract — %zu file(s) already fresh in %s",
numFiles, outputFolder.c_str());
return true; // already extracted and up-to-date
}
ANS_DBG("Extract",
"ExtractProtectedZipFile: full extract needed — anyFile=%d stale=%s",
anyFile ? 1 : 0,
staleFile.empty() ? "(empty folder)" : staleFile.c_str());
} else {
ANS_DBG("Extract", "ExtractProtectedZipFile: folder absent, full extract");
}
}
catch (const std::exception& ex) {
// Any filesystem hiccup: fall through to full extraction.
ANS_DBG("Extract", "ExtractProtectedZipFile: freshness check threw, extracting: %s",
ex.what());
}
zip_t* archive = zip_open(zipFileName.c_str(), ZIP_RDONLY, &error);
if (!archive) {
std::cerr << "Error opening ZIP archive: " << zip_strerror(archive) << std::endl;

View File

@@ -16,6 +16,8 @@
#include <vector>
#include <regex>
#include <stdio.h>
#include <memory>
#include <mutex>
//namespace logging = boost::log;
//namespace attrs = boost::log::attributes;
@@ -89,4 +91,12 @@ namespace fs = std::filesystem;
// For training engine
//bool ExtractPasswordProtectedZipForTrainingEgnine(const std::string& zipFileName, const std::string& password, const std::string& modelName, std::string& outputFolder, bool edgeDeviceModel = true);
ANSLICENSE_API bool ExtractProtectedZipFile(const std::string& zipFileName,const std::string& password,const std::string& modelName,const std::string outputFolder);
// Per-path mutex for a model folder. Used to serialize extract ↔ session
// creation on the same extracted folder so concurrent CreateANSODHandle calls
// cannot truncate/rewrite a model file while another thread is loading it.
// Keyed by folder path (not zip path) so both extractor and consumer agree.
// Returns std::timed_mutex so callers can bound their wait and avoid a hang
// if a peer thread deadlocks inside extraction or ORT session creation.
ANSLICENSE_API std::shared_ptr<std::timed_mutex> GetModelFolderLock(const std::string& folderPath);
#endif