Fix model extract race issue to all classes

This commit is contained in:
2026-04-24 12:19:54 +10:00
parent baa88bcc48
commit e2bf17289d
51 changed files with 1252 additions and 148 deletions

View File

@@ -99,4 +99,106 @@ namespace fs = std::filesystem;
// Returns std::timed_mutex so callers can bound their wait and avoid a hang
// if a peer thread deadlocks inside extraction or ORT session creation.
ANSLICENSE_API std::shared_ptr<std::timed_mutex> GetModelFolderLock(const std::string& folderPath);
// ============================================================================
// ModelFolderLockGuard
//
// RAII guard that serializes "open files in an extracted model folder" against
// re-entries into ExtractProtectedZipFile on the SAME folder. Without this,
// two threads creating handles for the same model zip can race so that thread
// A opens a model file (via ORT / TRT / OpenVINO) while thread B truncates it
// via std::ofstream inside the extractor — Windows surfaces this as "system
// error number 13" (EACCES) on the reader.
//
// Backed by GetModelFolderLock() above which returns a process-wide
// std::timed_mutex keyed on the folder path. The extractor takes the same
// lock, so extract ↔ open is mutually exclusive.
//
// Acquisition is bounded by `timeout` (default 120 s) so a deadlocked peer
// cannot hang the caller thread forever. On timeout, .acquired() is false and
// the caller must fail the load.
//
// Lives in Utility.h (rather than a per-module header) so every ANSCORE
// module — ANSODEngine, ANSOCR, ANSLPR, ANSFR, etc. — can use it without
// pulling in ANSODEngine headers.
//
// Usage:
//
// bool MyEngine::LoadModelFromFolder(...) {
// bool result = MyBase::LoadModelFromFolder(...);
// if (!result) return false;
// // ── serialize derived-class init against concurrent extracts ──
// ANSCENTER::ModelFolderLockGuard _flg(_modelFolder,
// "MyEngine::LoadModelFromFolder");
// if (!_flg.acquired()) {
// _logger.LogError("MyEngine::LoadModelFromFolder",
// "Timed out waiting for model-folder lock: " + _modelFolder,
// __FILE__, __LINE__);
// return false;
// }
// // ... existing body: Init(...) / buildLoadNetwork(...) / etc. ...
// }
//
// Notes:
// • Placement — insert AFTER the base's XXX() returns (extractor already
// released its own lock by then) and BEFORE any file open in _modelFolder.
// Wrapping the base call would deadlock — it takes the same lock itself.
// • RAII — destructor auto-releases on every return path within the scope.
// • Timing — entry/acquire/timeout are traced via ANS_DBG("EngineLoad"),
// filter on [EngineLoad] in DebugView to diagnose stalls.
// ============================================================================
namespace ANSCENTER {
class ModelFolderLockGuard {
public:
explicit ModelFolderLockGuard(const std::string& folderPath,
const char* caller,
std::chrono::seconds timeout = std::chrono::seconds(120))
: _caller(caller ? caller : "(?)"), _folder(folderPath)
{
if (folderPath.empty()) {
// Nothing to serialize — no extracted folder yet. Treat as
// acquired so caller's existing init runs unchanged (e.g.
// custom-path engines that never go through the zip extractor).
_ok = true;
ANS_DBG("EngineLoad", "%s: empty folder path, skipping lock",
_caller);
return;
}
auto lock = GetModelFolderLock(folderPath);
_guard = std::unique_lock<std::timed_mutex>(*lock, std::defer_lock);
ANS_DBG("EngineLoad",
"%s: waiting on folder lock (%llds): %s",
_caller, (long long)timeout.count(), folderPath.c_str());
const auto t0 = std::chrono::steady_clock::now();
if (_guard.try_lock_for(timeout)) {
const auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - t0).count();
_ok = true;
ANS_DBG("EngineLoad", "%s: folder lock acquired in %lldms",
_caller, (long long)ms);
} else {
const auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - t0).count();
_ok = false;
ANS_DBG("EngineLoad",
"%s: TIMEOUT on folder lock after %lldms: %s",
_caller, (long long)ms, folderPath.c_str());
}
}
// Non-copyable, non-movable: lock lifetime is tied to this scope.
ModelFolderLockGuard(const ModelFolderLockGuard&) = delete;
ModelFolderLockGuard& operator=(const ModelFolderLockGuard&) = delete;
bool acquired() const noexcept { return _ok; }
explicit operator bool() const noexcept { return _ok; }
private:
const char* _caller;
std::string _folder;
std::unique_lock<std::timed_mutex> _guard;
bool _ok = false;
};
} // namespace ANSCENTER
#endif