Remove locks
This commit is contained in:
@@ -1,22 +1,25 @@
|
||||
#include "Utility.h"
|
||||
#include <ctime>
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
// Bounded wait when acquiring GetZipPathLock inside extractor/writer entry
|
||||
// points. A genuine extract is seconds, but adversarial I/O (slow NAS,
|
||||
// antivirus holding files, libzip hang on malformed archive, a crashed
|
||||
// thread that never unwound the lock) can make it block indefinitely.
|
||||
// 15 minutes is generous enough for large models on slow storage yet short
|
||||
// enough that a service-wide hang turns into a localized load-failure
|
||||
// visible in DebugView.
|
||||
static constexpr auto kZipPathLockTimeout = std::chrono::minutes(15);
|
||||
|
||||
// Per-path mutex to serialize concurrent zip operations on the same target.
|
||||
// Without this, two LabVIEW threads can race: one extracting a zip while
|
||||
// another truncates/writes the same file, corrupting data and crashing LabVIEW.
|
||||
// Also used to serialize extract ↔ ONNX session creation on the same extracted
|
||||
// model folder — without that, thread A can finish extraction and begin opening
|
||||
// train_last.onnx while thread B re-enters extraction and truncates the file,
|
||||
// producing "system error number 13" (EACCES) on the first reader.
|
||||
// Recursive so the same thread can re-acquire the lock through layered load
|
||||
// calls — ANSALPR_OD::LoadEngine -> ANSONNXYOLO::LoadModelFromFolder both
|
||||
// acquire the SAME folder lock on the SAME thread. A non-recursive
|
||||
// timed_mutex deadlocks that nesting for 120 s then fails. Recursive keeps
|
||||
// cross-thread serialization intact while allowing legitimate re-entry from
|
||||
// the lock-holding thread.
|
||||
// Scope is limited to the extractor / zip writer — once ExtractProtectedZipFile
|
||||
// returns, the sidecar check makes subsequent calls fast no-ops, and readers
|
||||
// (ORT / TRT session create) access stable files concurrently with no lock.
|
||||
static std::mutex g_zipPathMapMutex;
|
||||
static std::map<std::string, std::shared_ptr<std::recursive_timed_mutex>> g_zipPathLocks;
|
||||
|
||||
@@ -27,13 +30,6 @@ static std::shared_ptr<std::recursive_timed_mutex> GetZipPathLock(const std::str
|
||||
return ptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<std::recursive_timed_mutex> GetModelFolderLock(const std::string& folderPath) {
|
||||
auto ptr = GetZipPathLock(folderPath);
|
||||
ANS_DBG("ModelLock", "GetModelFolderLock: folder=%s mutex=%p",
|
||||
folderPath.c_str(), (void*)ptr.get());
|
||||
return ptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T get_data(const boost::property_tree::ptree& pt, const std::string& key)
|
||||
{
|
||||
@@ -470,7 +466,17 @@ bool AddFolderContentsToZip(zip* archive, const char* folderPath, const char* zi
|
||||
|
||||
bool ZipFolderWithPassword(const char* folderPath, const char* zipFilePath, const char* password) {
|
||||
auto pathLock = GetZipPathLock(std::string(zipFilePath));
|
||||
std::lock_guard<std::recursive_timed_mutex> zipGuard(*pathLock);
|
||||
ANS_DBG("Extract", "ZipFolderWithPassword: waiting on zip lock (%llds): %s",
|
||||
(long long)std::chrono::duration_cast<std::chrono::seconds>(kZipPathLockTimeout).count(),
|
||||
zipFilePath ? zipFilePath : "(null)");
|
||||
std::unique_lock<std::recursive_timed_mutex> zipGuard(*pathLock, std::defer_lock);
|
||||
if (!zipGuard.try_lock_for(kZipPathLockTimeout)) {
|
||||
ANS_DBG("Extract", "ZipFolderWithPassword: TIMEOUT acquiring zip lock for %s",
|
||||
zipFilePath ? zipFilePath : "(null)");
|
||||
return false;
|
||||
}
|
||||
ANS_DBG("Extract", "ZipFolderWithPassword: zip lock acquired: %s",
|
||||
zipFilePath ? zipFilePath : "(null)");
|
||||
|
||||
zip* zipArchive;
|
||||
zip_flags_t flags = ZIP_CREATE | ZIP_TRUNCATE;
|
||||
@@ -856,7 +862,16 @@ std::string GetDateTimeString(const std::string& format) {
|
||||
bool ExtractProtectedZipFile(const std::string& zipFileName, const std::string& password, const std::string& modelName, const std::string outputFolder)
|
||||
{
|
||||
auto pathLock = GetZipPathLock(outputFolder);
|
||||
std::lock_guard<std::recursive_timed_mutex> zipGuard(*pathLock);
|
||||
ANS_DBG("Extract", "ExtractProtectedZipFile: waiting on zip lock (%llds): %s",
|
||||
(long long)std::chrono::duration_cast<std::chrono::seconds>(kZipPathLockTimeout).count(),
|
||||
outputFolder.c_str());
|
||||
std::unique_lock<std::recursive_timed_mutex> zipGuard(*pathLock, std::defer_lock);
|
||||
if (!zipGuard.try_lock_for(kZipPathLockTimeout)) {
|
||||
ANS_DBG("Extract", "ExtractProtectedZipFile: TIMEOUT acquiring zip lock for %s (zip=%s)",
|
||||
outputFolder.c_str(), zipFileName.c_str());
|
||||
return false;
|
||||
}
|
||||
ANS_DBG("Extract", "ExtractProtectedZipFile: zip lock acquired: %s", outputFolder.c_str());
|
||||
|
||||
int error;
|
||||
if (!FileExist(zipFileName))return false;
|
||||
|
||||
@@ -92,118 +92,5 @@ namespace fs = std::filesystem;
|
||||
//bool ExtractPasswordProtectedZipForTrainingEgnine(const std::string& zipFileName, const std::string& password, const std::string& modelName, std::string& outputFolder, bool edgeDeviceModel = true);
|
||||
ANSLICENSE_API bool ExtractProtectedZipFile(const std::string& zipFileName,const std::string& password,const std::string& modelName,const std::string outputFolder);
|
||||
|
||||
// Per-path mutex for a model folder. Used to serialize extract ↔ session
|
||||
// creation on the same extracted folder so concurrent CreateANSODHandle calls
|
||||
// cannot truncate/rewrite a model file while another thread is loading it.
|
||||
// Keyed by folder path (not zip path) so both extractor and consumer agree.
|
||||
// Returns std::recursive_timed_mutex so callers can bound their wait and
|
||||
// recursion — layered load paths (e.g. ANSALPR_OD::LoadEngine ->
|
||||
// ANSONNXYOLO::LoadModelFromFolder) legitimately re-enter on the same
|
||||
// thread; a non-recursive timed_mutex self-deadlocks that nesting. Cross-
|
||||
// thread serialization is unchanged.
|
||||
ANSLICENSE_API std::shared_ptr<std::recursive_timed_mutex> GetModelFolderLock(const std::string& folderPath);
|
||||
|
||||
// ============================================================================
|
||||
// ModelFolderLockGuard
|
||||
//
|
||||
// RAII guard that serializes "open files in an extracted model folder" against
|
||||
// re-entries into ExtractProtectedZipFile on the SAME folder. Without this,
|
||||
// two threads creating handles for the same model zip can race so that thread
|
||||
// A opens a model file (via ORT / TRT / OpenVINO) while thread B truncates it
|
||||
// via std::ofstream inside the extractor — Windows surfaces this as "system
|
||||
// error number 13" (EACCES) on the reader.
|
||||
//
|
||||
// Backed by GetModelFolderLock() above which returns a process-wide
|
||||
// std::recursive_timed_mutex keyed on the folder path. The extractor takes
|
||||
// the same lock, so extract ↔ open is mutually exclusive across threads,
|
||||
// while same-thread re-entry (layered loaders) is permitted without
|
||||
// deadlocking.
|
||||
//
|
||||
// Acquisition is bounded by `timeout` (default 120 s) so a deadlocked peer
|
||||
// cannot hang the caller thread forever. On timeout, .acquired() is false and
|
||||
// the caller must fail the load.
|
||||
//
|
||||
// Lives in Utility.h (rather than a per-module header) so every ANSCORE
|
||||
// module — ANSODEngine, ANSOCR, ANSLPR, ANSFR, etc. — can use it without
|
||||
// pulling in ANSODEngine headers.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// bool MyEngine::LoadModelFromFolder(...) {
|
||||
// bool result = MyBase::LoadModelFromFolder(...);
|
||||
// if (!result) return false;
|
||||
// // ── serialize derived-class init against concurrent extracts ──
|
||||
// ANSCENTER::ModelFolderLockGuard _flg(_modelFolder,
|
||||
// "MyEngine::LoadModelFromFolder");
|
||||
// if (!_flg.acquired()) {
|
||||
// _logger.LogError("MyEngine::LoadModelFromFolder",
|
||||
// "Timed out waiting for model-folder lock: " + _modelFolder,
|
||||
// __FILE__, __LINE__);
|
||||
// return false;
|
||||
// }
|
||||
// // ... existing body: Init(...) / buildLoadNetwork(...) / etc. ...
|
||||
// }
|
||||
//
|
||||
// Notes:
|
||||
// • Placement — insert AFTER the base's XXX() returns (extractor already
|
||||
// released its own lock by then) and BEFORE any file open in _modelFolder.
|
||||
// Wrapping the base call would deadlock — it takes the same lock itself.
|
||||
// • RAII — destructor auto-releases on every return path within the scope.
|
||||
// • Timing — entry/acquire/timeout are traced via ANS_DBG("EngineLoad"),
|
||||
// filter on [EngineLoad] in DebugView to diagnose stalls.
|
||||
// ============================================================================
|
||||
namespace ANSCENTER {
|
||||
class ModelFolderLockGuard {
|
||||
public:
|
||||
explicit ModelFolderLockGuard(const std::string& folderPath,
|
||||
const char* caller,
|
||||
std::chrono::seconds timeout = std::chrono::seconds(120))
|
||||
: _caller(caller ? caller : "(?)"), _folder(folderPath)
|
||||
{
|
||||
if (folderPath.empty()) {
|
||||
// Nothing to serialize — no extracted folder yet. Treat as
|
||||
// acquired so caller's existing init runs unchanged (e.g.
|
||||
// custom-path engines that never go through the zip extractor).
|
||||
_ok = true;
|
||||
ANS_DBG("EngineLoad", "%s: empty folder path, skipping lock",
|
||||
_caller);
|
||||
return;
|
||||
}
|
||||
auto lock = GetModelFolderLock(folderPath);
|
||||
_guard = std::unique_lock<std::recursive_timed_mutex>(*lock, std::defer_lock);
|
||||
ANS_DBG("EngineLoad",
|
||||
"%s: waiting on folder lock (%llds): %s",
|
||||
_caller, (long long)timeout.count(), folderPath.c_str());
|
||||
const auto t0 = std::chrono::steady_clock::now();
|
||||
if (_guard.try_lock_for(timeout)) {
|
||||
const auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::steady_clock::now() - t0).count();
|
||||
_ok = true;
|
||||
ANS_DBG("EngineLoad", "%s: folder lock acquired in %lldms",
|
||||
_caller, (long long)ms);
|
||||
} else {
|
||||
const auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::steady_clock::now() - t0).count();
|
||||
_ok = false;
|
||||
ANS_DBG("EngineLoad",
|
||||
"%s: TIMEOUT on folder lock after %lldms: %s",
|
||||
_caller, (long long)ms, folderPath.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Non-copyable, non-movable: lock lifetime is tied to this scope.
|
||||
ModelFolderLockGuard(const ModelFolderLockGuard&) = delete;
|
||||
ModelFolderLockGuard& operator=(const ModelFolderLockGuard&) = delete;
|
||||
|
||||
bool acquired() const noexcept { return _ok; }
|
||||
explicit operator bool() const noexcept { return _ok; }
|
||||
|
||||
private:
|
||||
const char* _caller;
|
||||
std::string _folder;
|
||||
std::unique_lock<std::recursive_timed_mutex> _guard;
|
||||
bool _ok = false;
|
||||
};
|
||||
} // namespace ANSCENTER
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user