Fix model extract race issue to all classes

This commit is contained in:
2026-04-24 12:19:54 +10:00
parent baa88bcc48
commit e2bf17289d
51 changed files with 1252 additions and 148 deletions

View File

@@ -855,56 +855,104 @@ bool ExtractProtectedZipFile(const std::string& zipFileName, const std::string&
int error;
if (!FileExist(zipFileName))return false;
// Idempotent fast-path: if the target folder already has a complete, fresh
// extraction (at least one non-empty regular file, all >= the zip's mtime),
// skip re-extraction. This prevents redundant passes from concurrent
// CreateANSODHandle calls from truncating files that another thread is
// already mmap'ing via ORT (which surfaces as EACCES / system error 13).
// Idempotent fast-path: skip re-extraction when the target folder already
// holds exactly the content of the current zip. Identity is tracked by a
// small sidecar file (.ans_extract_info) written inside outputFolder after
// every successful extract. The sidecar records three fields — zip path,
// zip size, zip mtime — and we SKIP only if ALL match the current zip AND
// the folder still has at least one non-empty file.
//
// Why sidecar and not just mtime? mtime alone breaks on:
// • downgrades — a reverted older zip has an older mtime, so "file mtime
// >= zip mtime" falsely reports fresh and we never re-extract.
// • preserved-timestamp copies (cp --preserve=timestamps, some installers,
// Git-LFS checkouts) — new-but-older-mtime zips look "not newer".
// Size differs whenever content differs (libzip's CRC/compression shifts
// bytes even for tiny payload changes), so size+mtime+path uniquely pins
// the zip identity in practice.
//
// Any mismatch / missing sidecar / FS error → fall through to full extract,
// which unconditionally rewrites the sidecar at the end. This means the
// first run after deploying this patched binary will do one re-extract per
// model to establish the sidecar; subsequent runs hit the fast path.
const std::filesystem::path sidecarPath =
std::filesystem::path(outputFolder) / ".ans_extract_info";
ANS_DBG("Extract", "ExtractProtectedZipFile: zip=%s -> folder=%s",
zipFileName.c_str(), outputFolder.c_str());
// Capture the current zip's identity up-front so both the fast-path check
// and the post-extract sidecar write see consistent values.
std::uintmax_t curZipSize = 0;
long long curZipMtime = 0;
bool zipStatOk = false;
try {
if (std::filesystem::exists(outputFolder) &&
std::filesystem::is_directory(outputFolder))
curZipSize = std::filesystem::file_size(zipFileName);
curZipMtime = std::filesystem::last_write_time(zipFileName)
.time_since_epoch().count();
zipStatOk = true;
}
catch (const std::exception& ex) {
ANS_DBG("Extract",
"ExtractProtectedZipFile: failed to stat zip (%s); extracting anyway",
ex.what());
}
try {
if (zipStatOk &&
std::filesystem::exists(outputFolder) &&
std::filesystem::is_directory(outputFolder) &&
std::filesystem::exists(sidecarPath))
{
const auto zipTime = std::filesystem::last_write_time(zipFileName);
bool anyFile = false;
bool allFresh = true;
size_t numFiles = 0;
std::string staleFile;
for (const auto& e : std::filesystem::directory_iterator(outputFolder)) {
if (!e.is_regular_file()) continue;
anyFile = true;
++numFiles;
std::error_code ec;
const auto sz = e.file_size(ec);
if (ec || sz == 0) {
allFresh = false;
staleFile = e.path().filename().string() + " (zero/err)";
break;
// Read sidecar: 3 lines — zipPath, zipSize, zipMtime.
std::ifstream sf(sidecarPath);
std::string storedPath;
std::uintmax_t storedSize = 0;
long long storedMtime = 0;
std::getline(sf, storedPath);
sf >> storedSize >> storedMtime;
if (sf.good() || sf.eof()) {
// Sanity: folder must still contain at least one non-empty file
// (defends against a sidecar that outlived its content, e.g. a
// user manually wiping model files but leaving the sidecar).
bool hasNonEmpty = false;
size_t numFiles = 0;
for (const auto& e : std::filesystem::directory_iterator(outputFolder)) {
if (!e.is_regular_file()) continue;
if (e.path().filename() == ".ans_extract_info") continue;
++numFiles;
std::error_code ec;
if (e.file_size(ec) > 0 && !ec) { hasNonEmpty = true; break; }
}
const auto ft = std::filesystem::last_write_time(e.path(), ec);
if (ec || ft < zipTime) {
allFresh = false;
staleFile = e.path().filename().string() + " (older than zip)";
break;
const bool pathOk = (storedPath == zipFileName);
const bool sizeOk = (storedSize == curZipSize);
const bool mtimeOk = (storedMtime == curZipMtime);
if (pathOk && sizeOk && mtimeOk && hasNonEmpty) {
ANS_DBG("Extract",
"ExtractProtectedZipFile: SKIP re-extract — sidecar match (size=%llu mtime=%lld, %zu payload file(s))",
(unsigned long long)curZipSize,
(long long)curZipMtime, numFiles);
return true;
}
ANS_DBG("Extract",
"ExtractProtectedZipFile: full extract needed — sidecar mismatch (path:%d size:%d mtime:%d nonEmpty:%d)",
pathOk ? 0 : 1, sizeOk ? 0 : 1, mtimeOk ? 0 : 1,
hasNonEmpty ? 1 : 0);
} else {
ANS_DBG("Extract",
"ExtractProtectedZipFile: sidecar unreadable, re-extracting");
}
if (anyFile && allFresh) {
ANS_DBG("Extract", "ExtractProtectedZipFile: SKIP re-extract — %zu file(s) already fresh in %s",
numFiles, outputFolder.c_str());
return true; // already extracted and up-to-date
}
} else if (zipStatOk) {
ANS_DBG("Extract",
"ExtractProtectedZipFile: full extract needed — anyFile=%d stale=%s",
anyFile ? 1 : 0,
staleFile.empty() ? "(empty folder)" : staleFile.c_str());
} else {
ANS_DBG("Extract", "ExtractProtectedZipFile: folder absent, full extract");
"ExtractProtectedZipFile: %s, full extract",
std::filesystem::exists(outputFolder) ? "no sidecar yet"
: "folder absent");
}
}
catch (const std::exception& ex) {
// Any filesystem hiccup: fall through to full extraction.
ANS_DBG("Extract", "ExtractProtectedZipFile: freshness check threw, extracting: %s",
// Any filesystem hiccup: fall through to full extraction (safe).
ANS_DBG("Extract",
"ExtractProtectedZipFile: freshness check threw, extracting: %s",
ex.what());
}
@@ -977,6 +1025,35 @@ bool ExtractProtectedZipFile(const std::string& zipFileName, const std::string&
return false;
}
zip_close(archive);
// Extract succeeded — write the identity sidecar so the next call on this
// folder can take the fast-path. We re-read zip stats here (rather than
// trust the pre-extract snapshot) in case the zip was touched in between.
try {
std::uintmax_t finalSize = std::filesystem::file_size(zipFileName);
long long finalMtime = std::filesystem::last_write_time(zipFileName)
.time_since_epoch().count();
std::ofstream sf(sidecarPath, std::ios::binary | std::ios::trunc);
if (sf.is_open()) {
sf << zipFileName << '\n' << finalSize << '\n' << finalMtime << '\n';
ANS_DBG("Extract",
"ExtractProtectedZipFile: sidecar written (size=%llu mtime=%lld) at %s",
(unsigned long long)finalSize, (long long)finalMtime,
sidecarPath.string().c_str());
} else {
ANS_DBG("Extract",
"ExtractProtectedZipFile: could not open sidecar for write: %s",
sidecarPath.string().c_str());
}
}
catch (const std::exception& ex) {
// Non-fatal — extract already succeeded. Next call will just re-extract
// once more until the sidecar can be written.
ANS_DBG("Extract",
"ExtractProtectedZipFile: sidecar write threw (non-fatal): %s",
ex.what());
}
return true;
}