Fix deadlock
This commit is contained in:
@@ -11,17 +11,23 @@
|
||||
// model folder — without that, thread A can finish extraction and begin opening
|
||||
// train_last.onnx while thread B re-enters extraction and truncates the file,
|
||||
// producing "system error number 13" (EACCES) on the first reader.
|
||||
// Recursive so the same thread can re-acquire the lock through layered load
|
||||
// calls — ANSALPR_OD::LoadEngine -> ANSONNXYOLO::LoadModelFromFolder both
|
||||
// acquire the SAME folder lock on the SAME thread. A non-recursive
|
||||
// timed_mutex deadlocks that nesting for 120 s then fails. Recursive keeps
|
||||
// cross-thread serialization intact while allowing legitimate re-entry from
|
||||
// the lock-holding thread.
|
||||
static std::mutex g_zipPathMapMutex;
|
||||
static std::map<std::string, std::shared_ptr<std::timed_mutex>> g_zipPathLocks;
|
||||
static std::map<std::string, std::shared_ptr<std::recursive_timed_mutex>> g_zipPathLocks;
|
||||
|
||||
static std::shared_ptr<std::timed_mutex> GetZipPathLock(const std::string& path) {
|
||||
static std::shared_ptr<std::recursive_timed_mutex> GetZipPathLock(const std::string& path) {
|
||||
std::lock_guard<std::mutex> guard(g_zipPathMapMutex);
|
||||
auto& ptr = g_zipPathLocks[path];
|
||||
if (!ptr) ptr = std::make_shared<std::timed_mutex>();
|
||||
if (!ptr) ptr = std::make_shared<std::recursive_timed_mutex>();
|
||||
return ptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<std::timed_mutex> GetModelFolderLock(const std::string& folderPath) {
|
||||
std::shared_ptr<std::recursive_timed_mutex> GetModelFolderLock(const std::string& folderPath) {
|
||||
auto ptr = GetZipPathLock(folderPath);
|
||||
ANS_DBG("ModelLock", "GetModelFolderLock: folder=%s mutex=%p",
|
||||
folderPath.c_str(), (void*)ptr.get());
|
||||
@@ -464,7 +470,7 @@ bool AddFolderContentsToZip(zip* archive, const char* folderPath, const char* zi
|
||||
|
||||
bool ZipFolderWithPassword(const char* folderPath, const char* zipFilePath, const char* password) {
|
||||
auto pathLock = GetZipPathLock(std::string(zipFilePath));
|
||||
std::lock_guard<std::timed_mutex> zipGuard(*pathLock);
|
||||
std::lock_guard<std::recursive_timed_mutex> zipGuard(*pathLock);
|
||||
|
||||
zip* zipArchive;
|
||||
zip_flags_t flags = ZIP_CREATE | ZIP_TRUNCATE;
|
||||
@@ -850,7 +856,7 @@ std::string GetDateTimeString(const std::string& format) {
|
||||
bool ExtractProtectedZipFile(const std::string& zipFileName, const std::string& password, const std::string& modelName, const std::string outputFolder)
|
||||
{
|
||||
auto pathLock = GetZipPathLock(outputFolder);
|
||||
std::lock_guard<std::timed_mutex> zipGuard(*pathLock);
|
||||
std::lock_guard<std::recursive_timed_mutex> zipGuard(*pathLock);
|
||||
|
||||
int error;
|
||||
if (!FileExist(zipFileName))return false;
|
||||
|
||||
@@ -96,9 +96,12 @@ namespace fs = std::filesystem;
|
||||
// creation on the same extracted folder so concurrent CreateANSODHandle calls
|
||||
// cannot truncate/rewrite a model file while another thread is loading it.
|
||||
// Keyed by folder path (not zip path) so both extractor and consumer agree.
|
||||
// Returns std::timed_mutex so callers can bound their wait and avoid a hang
|
||||
// if a peer thread deadlocks inside extraction or ORT session creation.
|
||||
ANSLICENSE_API std::shared_ptr<std::timed_mutex> GetModelFolderLock(const std::string& folderPath);
|
||||
// Returns std::recursive_timed_mutex so callers can bound their wait and
|
||||
// recursion — layered load paths (e.g. ANSALPR_OD::LoadEngine ->
|
||||
// ANSONNXYOLO::LoadModelFromFolder) legitimately re-enter on the same
|
||||
// thread; a non-recursive timed_mutex self-deadlocks that nesting. Cross-
|
||||
// thread serialization is unchanged.
|
||||
ANSLICENSE_API std::shared_ptr<std::recursive_timed_mutex> GetModelFolderLock(const std::string& folderPath);
|
||||
|
||||
// ============================================================================
|
||||
// ModelFolderLockGuard
|
||||
@@ -111,8 +114,10 @@ namespace fs = std::filesystem;
|
||||
// error number 13" (EACCES) on the reader.
|
||||
//
|
||||
// Backed by GetModelFolderLock() above which returns a process-wide
|
||||
// std::timed_mutex keyed on the folder path. The extractor takes the same
|
||||
// lock, so extract ↔ open is mutually exclusive.
|
||||
// std::recursive_timed_mutex keyed on the folder path. The extractor takes
|
||||
// the same lock, so extract ↔ open is mutually exclusive across threads,
|
||||
// while same-thread re-entry (layered loaders) is permitted without
|
||||
// deadlocking.
|
||||
//
|
||||
// Acquisition is bounded by `timeout` (default 120 s) so a deadlocked peer
|
||||
// cannot hang the caller thread forever. On timeout, .acquired() is false and
|
||||
@@ -165,7 +170,7 @@ namespace ANSCENTER {
|
||||
return;
|
||||
}
|
||||
auto lock = GetModelFolderLock(folderPath);
|
||||
_guard = std::unique_lock<std::timed_mutex>(*lock, std::defer_lock);
|
||||
_guard = std::unique_lock<std::recursive_timed_mutex>(*lock, std::defer_lock);
|
||||
ANS_DBG("EngineLoad",
|
||||
"%s: waiting on folder lock (%llds): %s",
|
||||
_caller, (long long)timeout.count(), folderPath.c_str());
|
||||
@@ -196,7 +201,7 @@ namespace ANSCENTER {
|
||||
private:
|
||||
const char* _caller;
|
||||
std::string _folder;
|
||||
std::unique_lock<std::timed_mutex> _guard;
|
||||
std::unique_lock<std::recursive_timed_mutex> _guard;
|
||||
bool _ok = false;
|
||||
};
|
||||
} // namespace ANSCENTER
|
||||
|
||||
Reference in New Issue
Block a user