From 053bd74aa31d7e182ea3db939124ac4dc0ae6ba7 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 17 May 2026 21:21:03 +0100 Subject: [PATCH 01/12] Windows/Common: Add ScopedHandle::reset() --- Source/Windows/Common/Handle.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Windows/Common/Handle.h b/Source/Windows/Common/Handle.h index 8424c56928..fd5f11ec0f 100644 --- a/Source/Windows/Common/Handle.h +++ b/Source/Windows/Common/Handle.h @@ -20,9 +20,14 @@ class ScopedHandle final { } ~ScopedHandle() { + reset(INVALID_HANDLE_VALUE); + } + + void reset(HANDLE NewHandle = INVALID_HANDLE_VALUE) { if (Handle != INVALID_HANDLE_VALUE) { NtClose(Handle); } + Handle = NewHandle; } const HANDLE& operator*() const { From 82030b8286ede1131f28d39dfe24d0bbeabb8651 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 17 May 2026 21:21:38 +0100 Subject: [PATCH 02/12] Windows: Declare winternl relocation APIs --- Source/Windows/include/winternl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Windows/include/winternl.h b/Source/Windows/include/winternl.h index 03f3c4497b..4218575bc6 100644 --- a/Source/Windows/include/winternl.h +++ b/Source/Windows/include/winternl.h @@ -505,6 +505,7 @@ NTSTATUS WINAPI LdrDisableThreadCalloutsForDll(HMODULE); NTSTATUS WINAPI LdrGetDllFullName(HMODULE, UNICODE_STRING*); NTSTATUS WINAPI LdrGetDllHandle(LPCWSTR, ULONG, const UNICODE_STRING*, HMODULE*); NTSTATUS WINAPI LdrGetProcedureAddress(HMODULE, const ANSI_STRING*, ULONG, void**); +IMAGE_BASE_RELOCATION* WINAPI LdrProcessRelocationBlock(ULONG_PTR, ULONG, USHORT*, INT_PTR); NTSTATUS WINAPI NtAllocateVirtualMemoryEx(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG); NTSTATUS WINAPI NtAllocateVirtualMemory(HANDLE, PVOID*, ULONG_PTR, SIZE_T*, ULONG, ULONG); NTSTATUS WINAPI NtContinue(PCONTEXT, BOOLEAN); @@ -538,6 +539,7 @@ ULONG WINAPI RtlFindClearBitsAndSet(PRTL_BITMAP, ULONG, ULONG); ULONG WINAPI RtlGetCurrentDirectory_U(ULONG, LPWSTR); PIMAGE_NT_HEADERS WINAPI RtlImageNtHeader(HMODULE); PVOID WINAPI RtlImageDirectoryEntryToData(HMODULE, BOOL, WORD, ULONG*); +PIMAGE_SECTION_HEADER WINAPI RtlImageRvaToSection(const IMAGE_NT_HEADERS*, HMODULE, DWORD); void WINAPI RtlInitializeConditionVariable(RTL_CONDITION_VARIABLE*); NTSTATUS WINAPI RtlInitializeCriticalSection(RTL_CRITICAL_SECTION*); void WINAPI RtlInitializeSRWLock(RTL_SRWLOCK*); From 23de875516807f7afb3ce471148eeb1de0795323 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 17 May 2026 23:06:02 +0100 Subject: [PATCH 03/12] Windows: Add NtUnmapViewOfSection prototype --- Source/Windows/include/winternl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Windows/include/winternl.h b/Source/Windows/include/winternl.h index 4218575bc6..2ad0970fc3 100644 --- a/Source/Windows/include/winternl.h +++ b/Source/Windows/include/winternl.h @@ -517,6 +517,7 @@ NTSTATUS WINAPI NtFreeVirtualMemory(HANDLE, PVOID*, SIZE_T*, ULONG); NTSTATUS WINAPI NtGetContextThread(HANDLE, CONTEXT*); ULONG WINAPI NtGetCurrentProcessorNumber(void); NTSYSAPI NTSTATUS WINAPI NtMapViewOfSection(HANDLE, HANDLE, PVOID*, ULONG_PTR, SIZE_T, const LARGE_INTEGER*, SIZE_T*, SECTION_INHERIT, ULONG, ULONG); +NTSYSAPI NTSTATUS WINAPI NtUnmapViewOfSection(HANDLE, PVOID); NTSTATUS WINAPI NtOpenKeyEx(PHANDLE, ACCESS_MASK, const OBJECT_ATTRIBUTES*, ULONG); NTSTATUS WINAPI NtProtectVirtualMemory(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG*); NTSTATUS WINAPI NtQueryAttributesFile(const OBJECT_ATTRIBUTES*, FILE_BASIC_INFORMATION*); From 942f34369de9834d80161591f97dc3ea01455dbb Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 13 Apr 2026 19:01:16 +0200 Subject: [PATCH 04/12] CodeCache: Align code section within cache files This allows mapping the code directly into memory for execution. --- FEXCore/Source/Interface/Core/CodeCache.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/FEXCore/Source/Interface/Core/CodeCache.cpp b/FEXCore/Source/Interface/Core/CodeCache.cpp index fcbd83fb3f..b80d0d5a02 100644 --- a/FEXCore/Source/Interface/Core/CodeCache.cpp +++ b/FEXCore/Source/Interface/Core/CodeCache.cpp @@ -233,7 +233,10 @@ uint64_t CodeCache::ComputeCodeMapId(std::string_view Filename, int FD) { struct CodeCacheHeader { std::array Magic = ExpectedMagic; - uint32_t FormatVersion = 1; + // Version history: + // 1: Initial version + // 2: Padding code buffer data to enable direct mapping + uint32_t FormatVersion = 2; uint8_t FEXVersion[20] = {}; uint32_t NumBlocks; uint32_t NumCodePages; @@ -260,7 +263,7 @@ bool CodeCache::SaveData(Core::InternalThreadState& Thread, int fd, const Execut std::ranges::copy(GIT_HASH, header.FEXVersion); header.NumBlocks = LookupCache.BlockList.size(); header.NumCodePages = LookupCache.CodePages.size(); - header.CodeBufferSize = CTX.LatestOffset; + header.CodeBufferSize = FEXCore::AlignUp(CTX.LatestOffset, Utils::FEX_PAGE_SIZE); header.NumRelocations = Relocations.size(); header.SerializedBaseAddress = SerializedBaseAddress; ::write(fd, &header, sizeof(header)); @@ -313,6 +316,12 @@ bool CodeCache::SaveData(Core::InternalThreadState& Thread, int fd, const Execut return false; } ::write(fd, CodeBufferData.data(), CodeBufferData.size()); + // Pad to next page in file for mmap + { + auto PaddedSize = AlignUp(lseek(fd, 0, SEEK_CUR), Utils::FEX_PAGE_SIZE); + ::ftruncate(fd, PaddedSize); + lseek(fd, PaddedSize, SEEK_SET); + } // Dump code pages static_assert(OrderedContainer, "Non-deterministic data source"); From c274b14a3af9504df828fd6cc5a2667f591c2440 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 14 Apr 2026 16:39:29 +0200 Subject: [PATCH 05/12] LinuxSyscalls: Defer MappedResource deletion until after code invalidation This ensures that any code buffer memory owned by the MappedResource is invalidated before being deallocated. --- .../LinuxEmulation/LinuxSyscalls/Syscalls.h | 6 +++- .../LinuxSyscalls/SyscallsSMCTracking.cpp | 32 ++++++++++++------- .../LinuxSyscalls/SyscallsVMATracking.cpp | 19 +++++++++-- .../LinuxSyscalls/SyscallsVMATracking.h | 10 ++++++ 4 files changed, 52 insertions(+), 15 deletions(-) diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.h b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.h index dd5b28bf9a..9810099ef2 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.h +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.h @@ -261,10 +261,14 @@ class SyscallHandler : public FEXCore::HLE::SyscallHandler, void TrackMprotect(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t len, int prot); void TrackMadvise(FEXCore::Core::InternalThreadState* Thread, uintptr_t Base, uintptr_t Size, int advice); - void InvalidateCodeRangeIfNecessary(FEXCore::Core::InternalThreadState* Thread, uint64_t Base, uint64_t Length) { + void InvalidateCodeRangeIfNecessary(FEXCore::Core::InternalThreadState* Thread, uint64_t Base, uint64_t Length, bool CheckPendingVMAResources) { if (SMCChecks != FEXCore::Config::CONFIG_SMC_NONE) { TM.InvalidateGuestCodeRange(Thread, Base, Length); } + if (CheckPendingVMAResources) { + auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread); + VMATracking.FlushPendingResourceDeletions(); + } } void InvalidateCodeRangeIfNecessaryOnRemap(FEXCore::Core::InternalThreadState* Thread, uint64_t OldAddress, uint64_t NewAddress, diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp index 69bf51c4f6..2289281886 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp @@ -170,7 +170,7 @@ void SyscallHandler::MarkGuestExecutableRange(FEXCore::Core::InternalThreadState } void SyscallHandler::InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) { - InvalidateCodeRangeIfNecessary(Thread, Start, Length); + InvalidateCodeRangeIfNecessary(Thread, Start, Length, false); } static FEXCore::ExecutableFileSectionInfo BuildSectionInfo(const VMATracking::MappedResource& Resource, uint64_t Base, uint64_t Size) { @@ -263,11 +263,12 @@ void* SyscallHandler::GuestMmap(bool Is64Bit, FEXCore::Core::InternalThreadState int fd, off_t offset) { LOGMAN_THROW_A_FMT(Is64Bit || (length >> 32) == 0, "values must fit to 32 bits"); - uint64_t Result {}; + uint64_t Result; size_t Size = FEXCore::AlignUp(length, FEXCore::Utils::FEX_PAGE_SIZE); std::optional LateMetadata = std::nullopt; std::optional CachedSection; + bool PendingResourceDeletion; { // NOTE: Frontend calls this with a nullptr Thread during initialization, but @@ -290,9 +291,10 @@ void* SyscallHandler::GuestMmap(bool Is64Bit, FEXCore::Core::InternalThreadState } LateMetadata = TrackMmap(Thread, Result, length, prot, flags, fd, offset, CachedSection); + PendingResourceDeletion = VMATracking.HasPendingResourceDeletions(); } - InvalidateCodeRangeIfNecessary(Thread, Result, Size); + InvalidateCodeRangeIfNecessary(Thread, Result, Size, PendingResourceDeletion); if (LateMetadata) { auto CodeInvalidationlk = FEXCore::GuardSignalDeferringSectionWithFallback(CTX->GetCodeInvalidationMutex(), Thread); @@ -310,8 +312,9 @@ uint64_t SyscallHandler::GuestMunmap(bool Is64Bit, FEXCore::Core::InternalThread LOGMAN_THROW_A_FMT(Is64Bit || (reinterpret_cast(addr) >> 32) == 0, "values must fit to 32 bits: {}", fmt::ptr(addr)); LOGMAN_THROW_A_FMT(Is64Bit || (length >> 32) == 0, "values must fit to 32 bits"); - uint64_t Result {}; + uint64_t Result; uint64_t Size = FEXCore::AlignUp(length, FEXCore::Utils::FEX_PAGE_SIZE); + bool PendingResourceDeletion; { // Frontend calls this with nullptr Thread during initialization. @@ -331,8 +334,9 @@ uint64_t SyscallHandler::GuestMunmap(bool Is64Bit, FEXCore::Core::InternalThread } } TrackMunmap(Thread, addr, length); + PendingResourceDeletion = VMATracking.HasPendingResourceDeletions(); } - InvalidateCodeRangeIfNecessary(Thread, reinterpret_cast(addr), Size); + InvalidateCodeRangeIfNecessary(Thread, reinterpret_cast(addr), Size, PendingResourceDeletion); if (length) { auto CodeInvalidationlk = FEXCore::GuardSignalDeferringSectionWithFallback(CTX->GetCodeInvalidationMutex(), Thread); @@ -433,7 +437,7 @@ uint64_t SyscallHandler::GuestMprotect(FEXCore::Core::InternalThreadState* Threa TrackMprotect(Thread, addr, len, prot); } - InvalidateCodeRangeIfNecessary(Thread, reinterpret_cast(addr), len); + InvalidateCodeRangeIfNecessary(Thread, reinterpret_cast(addr), len, false); // Prepare for delayed code cache load after ld/Wine is done applying relocations. // Hooking into mprotect is a reliable heuristic that matches behavior of ld (for ELF) and Wine (for PE). @@ -466,8 +470,9 @@ uint64_t SyscallHandler::GuestMprotect(FEXCore::Core::InternalThreadState* Threa } uint64_t SyscallHandler::GuestShmat(bool Is64Bit, FEXCore::Core::InternalThreadState* Thread, int shmid, const void* shmaddr, int shmflg) { - uint64_t Result {}; - uint64_t Length {}; + uint64_t Result; + uint64_t Length; + bool PendingResourceDeletion; { auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread); @@ -492,15 +497,17 @@ uint64_t SyscallHandler::GuestShmat(bool Is64Bit, FEXCore::Core::InternalThreadS Length = stat.shm_segsz; TrackShmat(Thread, shmid, Result, shmflg, Length); + PendingResourceDeletion = VMATracking.HasPendingResourceDeletions(); } - InvalidateCodeRangeIfNecessary(Thread, Result, Length); + InvalidateCodeRangeIfNecessary(Thread, Result, Length, PendingResourceDeletion); return Result; } uint64_t SyscallHandler::GuestShmdt(bool Is64Bit, FEXCore::Core::InternalThreadState* Thread, const void* shmaddr) { - uint64_t Result {}; - uint64_t Length {}; + uint64_t Result; + uint64_t Length; + bool PendingResourceDeletion; { auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread); if (Is64Bit) { @@ -516,9 +523,10 @@ uint64_t SyscallHandler::GuestShmdt(bool Is64Bit, FEXCore::Core::InternalThreadS } Length = TrackShmdt(Thread, reinterpret_cast(shmaddr)); + PendingResourceDeletion = VMATracking.HasPendingResourceDeletions(); } - InvalidateCodeRangeIfNecessary(Thread, reinterpret_cast(shmaddr), Length); + InvalidateCodeRangeIfNecessary(Thread, reinterpret_cast(shmaddr), Length, PendingResourceDeletion); return Result; } diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp index b2bb4836a5..ad5146cde0 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp @@ -235,7 +235,13 @@ void VMATracking::DeleteVMARange(FEXCore::Context::Context* CTX, uintptr_t Base, // If linked to a Mapped Resource, remove from linked list and possibly delete the Mapped Resource if (Current->Resource) { if (ListRemove(Current) && Current->Resource != PreservedMappedResource) { - MappedResources.erase(Current->Resource->Iterator); + auto Iter = Current->Resource->Iterator; + // Defer deletion if the resource has mapped code cache data, so its code buffer + // outlives code cache invalidation (which runs after the VMA lock is released). + if (false) { // TODO: Consider unconditionally deferring deletion? + PendingResourceDeletions.push_back(std::move(*Current->Resource)); + } + MappedResources.erase(Iter); } } @@ -277,6 +283,11 @@ void VMATracking::DeleteVMARange(FEXCore::Context::Context* CTX, uintptr_t Base, } } +void VMATracking::FlushPendingResourceDeletions() { + Mutex.check_lock_owned_by_self_as_write(); + PendingResourceDeletions.clear(); +} + // Change flags of mappings in a range and split the mappings if needed void VMATracking::ChangeProtectionFlags(uintptr_t Base, uintptr_t Length, VMAProt NewProt) { Mutex.check_lock_owned_by_self_as_write(); @@ -542,7 +553,11 @@ uintptr_t VMATracking::DeleteSHMRegion(FEXCore::Context::Context* CTX, uintptr_t do { if (Entry->second.Resource == Resource) { if (ListRemove(&Entry->second)) { - MappedResources.erase(Entry->second.Resource->Iterator); + auto Iter = Entry->second.Resource->Iterator; + if (false) { // TODO: Consider unconditionally deferring deletion? + PendingResourceDeletions.push_back(std::move(*Entry->second.Resource)); + } + MappedResources.erase(Iter); } Entry = VMAs.erase(Entry); } else { diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.h b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.h index 00da451679..e6f7f826b9 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.h +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.h @@ -136,8 +136,18 @@ struct VMATracking { return MappedResources.equal_range(mrid); } + bool HasPendingResourceDeletions() const { + return !PendingResourceDeletions.empty(); + } + + // Flush pending MappedResource deletions. This must be called after code + // invalidation related to unmapped/remapped memory to avoid memory leaks. + // - Mutex must be unique_locked before calling + void FlushPendingResourceDeletions(); + private: MappedResource::ContainerType MappedResources; + fextl::vector PendingResourceDeletions; }; From 0f9f7300e5623d9812f08d139aef4f0f660dae63 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 22 Apr 2026 14:44:19 +0200 Subject: [PATCH 06/12] CodeCache: Implement lazy code loading --- .../Source/Interface/Config/Config.json.in | 7 + FEXCore/Source/Interface/Context/Context.cpp | 2 +- FEXCore/Source/Interface/Context/Context.h | 8 +- FEXCore/Source/Interface/Core/CodeCache.cpp | 482 ++++++++++++------ FEXCore/Source/Interface/Core/Core.cpp | 1 - FEXCore/Source/Interface/Core/LookupCache.cpp | 3 + FEXCore/include/FEXCore/Core/CodeCache.h | 91 +++- .../include/FEXCore/Utils/SignalScopeGuards.h | 3 + .../LinuxEmulation/LinuxSyscalls/Syscalls.cpp | 18 +- .../LinuxEmulation/LinuxSyscalls/Syscalls.h | 5 +- .../LinuxSyscalls/SyscallsSMCTracking.cpp | 95 +++- .../LinuxSyscalls/SyscallsVMATracking.cpp | 28 +- .../LinuxSyscalls/SyscallsVMATracking.h | 19 +- Source/Windows/Common/ImageTracker.cpp | 3 +- 14 files changed, 563 insertions(+), 202 deletions(-) diff --git a/FEXCore/Source/Interface/Config/Config.json.in b/FEXCore/Source/Interface/Config/Config.json.in index a3ddd6de8f..fa56db44c2 100644 --- a/FEXCore/Source/Interface/Config/Config.json.in +++ b/FEXCore/Source/Interface/Config/Config.json.in @@ -23,6 +23,13 @@ "Enable the code caching subsystem" ] }, + "EnableLazyCodeCachingWIP": { + "Type": "bool", + "Default": "false", + "Desc": [ + "Enable lazy loading of chunks in code caches" + ] + }, "EnableCodeCacheValidation": { "Type": "bool", "Default": "false", diff --git a/FEXCore/Source/Interface/Context/Context.cpp b/FEXCore/Source/Interface/Context/Context.cpp index d4af939cc5..de8415b0af 100644 --- a/FEXCore/Source/Interface/Context/Context.cpp +++ b/FEXCore/Source/Interface/Context/Context.cpp @@ -53,6 +53,6 @@ FEXCore::CPUID::FunctionResults FEXCore::Context::ContextImpl::RunCPUIDFunctionN } bool FEXCore::Context::ContextImpl::IsAddressInCodeBuffer(FEXCore::Core::InternalThreadState* Thread, uintptr_t Address) const { - return Thread->CPUBackend->IsAddressInCodeBuffer(Address); + return Thread->CPUBackend->IsAddressInCodeBuffer(Address) || CodeCache.IsAddressInMappedCodeBuffer(Address); } } // namespace FEXCore::Context diff --git a/FEXCore/Source/Interface/Context/Context.h b/FEXCore/Source/Interface/Context/Context.h index e195f6b9e8..b3868ca224 100644 --- a/FEXCore/Source/Interface/Context/Context.h +++ b/FEXCore/Source/Interface/Context/Context.h @@ -76,11 +76,17 @@ class CodeCache : public AbstractCodeCache { bool IsGeneratingCache = false; FEX_CONFIG_OPT(EnableCodeCaching, ENABLECODECACHINGWIP); + FEX_CONFIG_OPT(EnableLazyCodeCaching, ENABLELAZYCODECACHINGWIP); FEX_CONFIG_OPT(EnableCodeCacheValidation, ENABLECODECACHEVALIDATION); uint64_t ComputeCodeMapId(std::string_view Filename, int FD) override; bool SaveData(Core::InternalThreadState&, int TargetFD, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress) override; - bool LoadData(Core::InternalThreadState*, std::byte* MappedCacheFile, const ExecutableFileSectionInfo&) override; + + fextl::unique_ptr LoadCache(std::span CacheFile, const ExecutableFileInfo&, uint64_t FileStartVA) override; + + bool EnableLoadedSection(Core::InternalThreadState*, MappedCodeCacheFile&, const ExecutableFileSectionInfo&) override; + + void FinalizeCodePages(MappedCodeCacheFile&, std::span CodeRange) override; /** * Performs expensive extra validation on the loaded code cache data. diff --git a/FEXCore/Source/Interface/Core/CodeCache.cpp b/FEXCore/Source/Interface/Core/CodeCache.cpp index b80d0d5a02..9d23b311f5 100644 --- a/FEXCore/Source/Interface/Core/CodeCache.cpp +++ b/FEXCore/Source/Interface/Core/CodeCache.cpp @@ -1,4 +1,9 @@ // SPDX-License-Identifier: MIT +#include "FEXCore/Utils/LogManager.h" +#include "FEXCore/Utils/MathUtils.h" +#include "FEXCore/Utils/TypeDefines.h" +#include "FEXCore/fextl/memory.h" +#include #include #include @@ -16,10 +21,14 @@ #include +#include #include +#include #include +#include + #include namespace FEXCore { @@ -32,6 +41,32 @@ ExecutableFileInfo::ExecutableFileInfo(fextl::unique_ptr Map #endif ExecutableFileInfo::~ExecutableFileInfo() = default; +MappedCodeCacheFile::~MappedCodeCacheFile() { + if (CacheManager) { + CacheManager->UnregisterMappedCodeBuffer(*this); + } +} + +void AbstractCodeCache::RegisterMappedCodeBuffer(MappedCodeCacheFile& Code) { + MappedCodeBuffers.push_back(Code.CodeBuffer); + // Unregister on destruction of Code + Code.CacheManager = this; +} + +void AbstractCodeCache::UnregisterMappedCodeBuffer(MappedCodeCacheFile& Code) { + std::erase_if(MappedCodeBuffers, [&](const auto& Elem) { return Elem.data() == Code.CodeBuffer.data(); }); +} + +bool AbstractCodeCache::IsAddressInMappedCodeBuffer(uintptr_t Address) const { + for (const auto& Range : MappedCodeBuffers) { + auto Start = reinterpret_cast(Range.data()); + if (Address >= Start && Address < Start + Range.size_bytes()) { + return true; + } + } + return false; +} + fextl::string CodeMap::GetBaseFilename(const ExecutableFileInfo& MainExecutable, bool AddNombSuffix) { auto FileId = MainExecutable.FileId; @@ -339,167 +374,6 @@ bool CodeCache::SaveData(Core::InternalThreadState& Thread, int fd, const Execut return true; } -bool CodeCache::LoadData(Core::InternalThreadState* Thread, std::byte* MappedCacheFile, const ExecutableFileSectionInfo& BinarySection) { - if (!EnableCodeCaching) { - return true; - } - - namespace ranges = std::ranges; - - // Read file header - CodeCacheHeader header {}; - ::memcpy(&header, MappedCacheFile, sizeof(header)); - MappedCacheFile += sizeof(header); - - LogMan::Msg::IFmt("Cache load: {:5} blocks; base={:#14x}; off={:#9x}-{:#09x}; {:016x} {}", header.NumBlocks, BinarySection.FileStartVA, - BinarySection.BeginVA - BinarySection.FileStartVA, BinarySection.EndVA - BinarySection.FileStartVA, - BinarySection.FileInfo.FileId, BinarySection.FileInfo.Filename); - - if (!ranges::equal(header.Magic, header.ExpectedMagic)) { - LogMan::Msg::EFmt("Invalid cache file header"); - return false; - } - - if (!ranges::equal(header.FEXVersion, GIT_HASH)) { - LogMan::Msg::IFmt("Cache generated from old FEX version {:02x}, current is {:02x}; skipping", fmt::join(header.FEXVersion, ""), - fmt::join(GIT_HASH, "")); - return false; - } - - if (header.NumBlocks == 0) { - // Valid caches are never empty - LogMan::Msg::IFmt("Code cache empty, aborting"); - return false; - } - - // Read guest<->host block mappings - using BlockListEntry = decltype(GuestToHostMap::BlockList)::value_type; - fextl::vector BlockList(header.NumBlocks); - { - for (auto& BlockPtr : BlockList) { - ::memcpy(&BlockPtr.first, MappedCacheFile, sizeof(BlockPtr.first)); - MappedCacheFile += sizeof(BlockPtr.first); - ::memcpy(&BlockPtr.second.HostCode, MappedCacheFile, sizeof(BlockPtr.second.HostCode)); - MappedCacheFile += sizeof(BlockPtr.second.HostCode); - uint64_t NumGuestPages; - ::memcpy(&NumGuestPages, MappedCacheFile, sizeof(NumGuestPages)); - MappedCacheFile += sizeof(NumGuestPages); - - BlockPtr.second.CodePages.resize(NumGuestPages); - ::memcpy(BlockPtr.second.CodePages.data(), MappedCacheFile, std::span {BlockPtr.second.CodePages}.size_bytes()); - MappedCacheFile += std::span {BlockPtr.second.CodePages}.size_bytes(); - } - - // Constrain BlockList to the given ExecutableFileSectionInfo - LOGMAN_THROW_A_FMT(ranges::is_sorted(BlockList, [](auto& a, auto& b) { return a.first < b.first; }), "Expected sorted block list"); - auto begin = ranges::lower_bound(BlockList, BinarySection.BeginVA - BinarySection.FileStartVA, std::less {}, &BlockListEntry::first); - auto end = - ranges::upper_bound(begin, BlockList.end(), BinarySection.EndVA - BinarySection.FileStartVA - 1, std::less {}, &BlockListEntry::first); - if (begin == end) { - // Not an error since there is just no data to load - LogMan::Msg::IFmt("No blocks cached in this range, aborting"); - return true; - } - BlockList.erase(end, BlockList.end()); - BlockList.erase(BlockList.begin(), begin); - } - - // Read relocations - fextl::vector Relocations(header.NumRelocations, FEXCore::CPU::Relocation::Default()); - ::memcpy(Relocations.data(), MappedCacheFile, Relocations.size() * sizeof(Relocations[0])); - MappedCacheFile += Relocations.size() * sizeof(Relocations[0]); - - // Pad to next page in file, which contains CodeBuffer data - MappedCacheFile = reinterpret_cast(AlignUp(reinterpret_cast(MappedCacheFile), Utils::FEX_PAGE_SIZE)); - - // Prepare CodeBuffer: Page aligned and big enough to hold all cached data - auto Lock = std::unique_lock {CTX.CodeBufferWriteMutex}; - if (Thread) { - if (auto Prev = Thread->CPUBackend->CheckCodeBufferUpdate()) { - Allocator::VirtualDontNeed(Thread->CallRetStackBase, FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE); - auto lk = Thread->LookupCache->AcquireWriteLock(); - Thread->LookupCache->ChangeGuestToHostMapping(*Prev, *CTX.GetLatest()->LookupCache, lk); - } - } - - auto CodeBuffer = CTX.GetLatest(); - LOGMAN_THROW_A_FMT(reinterpret_cast(CodeBuffer->Ptr) % 0x1000 == 0, "Expected CodeBuffer base to be page-aligned"); - const auto Delta = AlignUp(CTX.LatestOffset, 0x1000) - CTX.LatestOffset; - CTX.LatestOffset += Delta; - - while (CTX.LatestOffset + header.CodeBufferSize > CodeBuffer->UsableSize()) { - if (Thread) { - CTX.ClearCodeCache(Thread); - CodeBuffer = CTX.GetLatest(); - LogMan::Msg::IFmt("Increased code buffer size to {} MiB for cache load", CodeBuffer->AllocatedSize / 1024 / 1024); - } else { - ERROR_AND_DIE_FMT("Cannot extend codebuffer without thread!"); - } - } - - // Read CodeBuffer data from file. Make sure the destination is page-aligned. - // TODO: Only load the data needed for the selected section - auto CodeBufferRange = - std::as_writable_bytes(std::span {CodeBuffer->Ptr, CodeBuffer->UsableSize()}).subspan(CTX.LatestOffset, header.CodeBufferSize); - ::memcpy(CodeBufferRange.data(), MappedCacheFile, header.CodeBufferSize); - MappedCacheFile += header.CodeBufferSize; - CTX.LatestOffset += header.CodeBufferSize; - - // Apply FEX relocations - auto Ret = ApplyCodeRelocations(BinarySection.FileStartVA, CodeBufferRange, Relocations, false); - LOGMAN_THROW_A_FMT(Ret == true, "Failed to apply code cache relocations"); - - { - auto& LookupCache = *CodeBuffer->LookupCache; - auto WriteLock = LookupCache.AcquireWriteLock(); - - // Register blocks to LookupCache - for (auto& [Guest, Host] : BlockList) { - for (auto& CodePage : Host.CodePages) { - CodePage += BinarySection.FileStartVA; - } - auto HostCode = reinterpret_cast(Host.HostCode + reinterpret_cast(CodeBufferRange.data())); - LookupCache.AddBlockMapping(Guest + BinarySection.FileStartVA, std::move(Host.CodePages), HostCode, WriteLock); - } - - // Register loaded code ranges - fextl::vector Entrypoints; - for (uint32_t i = 0; i < header.NumCodePages; ++i) { - uint64_t CodePage; - memcpy(&CodePage, MappedCacheFile, sizeof(CodePage)); - CodePage += BinarySection.FileStartVA; - MappedCacheFile += sizeof(CodePage); - - uint64_t NumEntrypoints; - memcpy(&NumEntrypoints, MappedCacheFile, sizeof(NumEntrypoints)); - MappedCacheFile += sizeof(NumEntrypoints); - - Entrypoints.resize(NumEntrypoints); - memcpy(Entrypoints.data(), MappedCacheFile, NumEntrypoints * sizeof(Entrypoints[0])); - MappedCacheFile += NumEntrypoints * sizeof(Entrypoints[0]); - for (auto& Entrypoint : Entrypoints) { - Entrypoint += BinarySection.FileStartVA; - } - - if (LookupCache.AddBlockExecutableRange(Entrypoints, CodePage, FEXCore::Utils::FEX_PAGE_SIZE, WriteLock)) { - CTX.SyscallHandler->MarkGuestExecutableRange(Thread, CodePage, FEXCore::Utils::FEX_PAGE_SIZE); - } - } - } - - if (EnableCodeCacheValidation) { - fextl::set GuestBlocks, HostBlocks; - for (auto& [Guest, Host] : BlockList) { - GuestBlocks.insert(Guest + BinarySection.FileStartVA); - HostBlocks.insert(Host.HostCode); - } - - Validate(BinarySection, std::move(GuestBlocks), HostBlocks, CodeBufferRange); - } - - return true; -} - void CodeCache::Validate(const ExecutableFileSectionInfo& Section, fextl::set GuestBlocks, const fextl::set& HostBlocks, std::span CachedCode) { LOGMAN_THROW_A_FMT(!HostBlocks.empty(), "Tried to validate without any host blocks"); @@ -612,7 +486,7 @@ void CodeCache::Validate(const ExecutableFileSectionInfo& Section, fextl::setLookupCache->ClearCache(ValidationThread->LookupCache->AcquireWriteLock()); ValidationCTX->LatestOffset = 0; - LogMan::Msg::IFmt("\tSuccessfully validated cache"); + LogMan::Msg::IFmt(" successfully validated cache"); } bool CodeCache::ApplyCodeRelocations(uint64_t GuestEntry, std::span Code, @@ -659,4 +533,288 @@ bool CodeCache::ApplyCodeRelocations(uint64_t GuestEntry, std::span C return true; } +fextl::unique_ptr +CodeCache::LoadCache(std::span CacheFile, const ExecutableFileInfo& FileInfo, uint64_t FileStartVA) { + if (!EnableCodeCaching) { + return nullptr; + } + + FEXCORE_PROFILE_SCOPED("LoadCache"); + + // Read file header + CodeCacheHeader header {}; + ::memcpy(&header, CacheFile.data(), sizeof(header)); + + if (!std::ranges::equal(header.Magic, header.ExpectedMagic)) { + LogMan::Msg::EFmt("Invalid cache file header"); + return nullptr; + } + + if (!std::ranges::equal(header.FEXVersion, GIT_HASH)) { + LogMan::Msg::IFmt("Cache generated from old FEX version {:02x}, current is {:02x}; skipping", fmt::join(header.FEXVersion, ""), + fmt::join(GIT_HASH, "")); + return nullptr; + } + + if (header.NumBlocks == 0) { + // Valid caches are never empty + LogMan::Msg::IFmt("Code cache empty, aborting"); + return nullptr; + } + + // Skip over BlockEntry data since it won't be used until EnableLoadedSection + // TODO: Store direct offset to relocations in the header + auto* BlockListStart = CacheFile.data() + sizeof(header); + auto* Cursor = BlockListStart; + for (uint32_t i = 0; i < header.NumBlocks; ++i) { + Cursor += sizeof(uint64_t); // guest address + Cursor += sizeof(uint64_t); // host code address + uint64_t NumGuestCodePages; + ::memcpy(&NumGuestCodePages, Cursor, sizeof(NumGuestCodePages)); + Cursor += sizeof(NumGuestCodePages); + Cursor += NumGuestCodePages * sizeof(uint64_t); + } + + auto Relocations = std::span {reinterpret_cast(Cursor), header.NumRelocations}; + Cursor += Relocations.size_bytes(); + + // Pad to next page to get the code buffer data + Cursor = reinterpret_cast(AlignUp(reinterpret_cast(Cursor), Utils::FEX_PAGE_SIZE)); + auto CodeDataInFile = std::span {Cursor, header.CodeBufferSize}; + + // Make code data inaccessible until finalized + Allocator::VirtualProtect(CodeDataInFile.data(), header.CodeBufferSize, Allocator::ProtectOptions::None); + + // Group relocations by page + size_t NumPages = header.CodeBufferSize / Utils::FEX_PAGE_SIZE; + fextl::vector PageRelocationRanges(NumPages, {0, 0}); + auto RelocBaseOffset = std::as_bytes(Relocations).data() - CacheFile.data(); + auto RelocIt = Relocations.begin(); + for (size_t Page = 0; Page < NumPages; ++Page) { + auto EndRelocIt = std::upper_bound(RelocIt, Relocations.end(), Page, + [](auto& Page, auto& Reloc) { return Page < Reloc.Header.Offset / Utils::FEX_PAGE_SIZE; }); + PageRelocationRanges.at(Page) = {static_cast(RelocBaseOffset + (RelocIt - Relocations.begin()) * sizeof(CPU::Relocation)), + static_cast(EndRelocIt - RelocIt)}; + RelocIt = EndRelocIt; + } + + auto Storage = FEXCore::Allocator::aligned_alloc(alignof(MappedCodeCacheFile), sizeof(MappedCodeCacheFile)); + return fextl::unique_ptr( + new (Storage) MappedCodeCacheFile {this, CacheFile, CodeDataInFile, BlockListStart, header.NumBlocks, header.NumCodePages, + std::move(PageRelocationRanges), fextl::vector(NumPages), FileStartVA}); +} + +bool CodeCache::EnableLoadedSection(Core::InternalThreadState* Thread, MappedCodeCacheFile& Code, const ExecutableFileSectionInfo& BinarySection) { + if (!EnableCodeCaching) { + return true; + } + + namespace ranges = std::ranges; + + FEXCORE_PROFILE_SCOPED("EnableLoadedSection"); + + // Read block list from cache file + // TODO: Store section-ized BlockLists in cache file + using BlockListEntry = decltype(GuestToHostMap::BlockList)::value_type; + fextl::vector BlockList(Code.NumBlocks); + { + auto* Cursor = Code.BlockListInFile; + for (auto& BlockPtr : BlockList) { + ::memcpy(&BlockPtr.first, Cursor, sizeof(BlockPtr.first)); + Cursor += sizeof(BlockPtr.first); + ::memcpy(&BlockPtr.second.HostCode, Cursor, sizeof(BlockPtr.second.HostCode)); + Cursor += sizeof(BlockPtr.second.HostCode); + uint64_t NumGuestPages; + ::memcpy(&NumGuestPages, Cursor, sizeof(NumGuestPages)); + Cursor += sizeof(NumGuestPages); + + BlockPtr.second.CodePages.resize(NumGuestPages); + ::memcpy(BlockPtr.second.CodePages.data(), Cursor, std::span {BlockPtr.second.CodePages}.size_bytes()); + Cursor += std::span {BlockPtr.second.CodePages}.size_bytes(); + } + + // Constrain BlockList to the given ExecutableFileSectionInfo + LOGMAN_THROW_A_FMT(ranges::is_sorted(BlockList, [](auto& a, auto& b) { return a.first < b.first; }), "Expected sorted block list"); + auto begin = ranges::lower_bound(BlockList, BinarySection.BeginVA - BinarySection.FileStartVA, std::less {}, &BlockListEntry::first); + auto end = + ranges::upper_bound(begin, BlockList.end(), BinarySection.EndVA - BinarySection.FileStartVA - 1, std::less {}, &BlockListEntry::first); + if (begin == end) { + LogMan::Msg::IFmt("No blocks cached in this range, aborting"); + return true; + } + BlockList.erase(end, BlockList.end()); + BlockList.erase(BlockList.begin(), begin); + } + + LogMan::Msg::IFmt("Cache load: {:5} blocks; base={:#14x}; off={:#9x}-{:#09x}; {:016x} {}", BlockList.size(), BinarySection.FileStartVA, + BinarySection.BeginVA - BinarySection.FileStartVA, BinarySection.EndVA - BinarySection.FileStartVA, + BinarySection.FileInfo.FileId, BinarySection.FileInfo.Filename); + + if (EnableLazyCodeCaching) { + LogMan::Msg::IFmt(" lazy mapping: base={:#14x} -> host={}; cache_source={}", BinarySection.FileStartVA, + fmt::ptr(Code.CodeBuffer.data()), fmt::ptr(Code.MappedFile.data())); + } + // Register blocks to LookupCache. + // The host addresses will point into the protected code buffer, so that FEX + // can lazily apply relocations on first execution of each page. + auto CodeBuffer = CTX.GetLatest(); + { + FEXCORE_PROFILE_SCOPED("Decode"); + auto& LookupCache = *CodeBuffer->LookupCache; + auto WriteLock = LookupCache.AcquireWriteLock(); + + for (auto& [Guest, Block] : BlockList) { + for (auto& CodePage : Block.CodePages) { + CodePage += BinarySection.FileStartVA; + } + LOGMAN_THROW_A_FMT(Block.HostCode < Code.CodeBuffer.size_bytes(), "Host offset {:#x} out of range ({:#x})", Block.HostCode, + Code.CodeBuffer.size_bytes()); + auto HostCode = &Code.CodeBuffer[Block.HostCode]; + LookupCache.AddBlockMapping(Guest + BinarySection.FileStartVA, std::move(Block.CodePages), HostCode, WriteLock); + } + + // Guest code pages + auto* Cursor = Code.CodeBuffer.data() + Code.CodeBuffer.size_bytes(); + fextl::vector Entrypoints; + for (uint32_t i = 0; i < Code.NumCodePages; ++i) { + uint64_t CodePage; + memcpy(&CodePage, Cursor, sizeof(CodePage)); + CodePage += BinarySection.FileStartVA; + Cursor += sizeof(CodePage); + + uint64_t NumEntrypoints; + memcpy(&NumEntrypoints, Cursor, sizeof(NumEntrypoints)); + Cursor += sizeof(NumEntrypoints); + + Entrypoints.resize(NumEntrypoints); + memcpy(Entrypoints.data(), Cursor, std::span {Entrypoints}.size_bytes()); + Cursor += std::span {Entrypoints}.size_bytes(); + for (auto& Entrypoint : Entrypoints) { + Entrypoint += BinarySection.FileStartVA; + } + + if (LookupCache.AddBlockExecutableRange(Entrypoints, CodePage, FEXCore::Utils::FEX_PAGE_SIZE, WriteLock)) { + CTX.SyscallHandler->MarkGuestExecutableRange(Thread, CodePage, FEXCore::Utils::FEX_PAGE_SIZE); + } + } + } + + if (!EnableLazyCodeCaching || EnableCodeCacheValidation) { + auto Range = SelectCodeRangeToFinalize(Code, 0, Code.CodeBuffer.size_bytes() / Utils::FEX_PAGE_SIZE); + FinalizeCodePages(Code, Range); + } + + if (EnableCodeCacheValidation) { + fextl::set GuestBlocks, HostBlocks; + for (auto& [Guest, Host] : BlockList) { + GuestBlocks.insert(Guest + BinarySection.FileStartVA); + HostBlocks.insert(Host.HostCode); + } + + Validate(BinarySection, std::move(GuestBlocks), HostBlocks, Code.CodeBuffer); + } + + return true; +} + +} // namespace FEXCore::Context + +namespace FEXCore { + +static std::span SpanPageRelocations(const MappedCodeCacheFile& Code, size_t PageIndex) { + auto [Offset, Count] = Code.PageRelocationRanges.at(PageIndex); + return std::span {reinterpret_cast(Code.MappedFile.data() + Offset), Count}; +} + +std::span AbstractCodeCache::SelectCodeRangeToFinalize(MappedCodeCacheFile& Code, size_t StartPage, size_t EndPage) { + // First, check if we were racing another thread in loading this range + if (std::find(Code.LoadedPages.begin() + StartPage, Code.LoadedPages.begin() + EndPage, false) == Code.LoadedPages.begin() + EndPage) { + return {}; + } + + LOGMAN_THROW_A_FMT(StartPage < EndPage, "Invalid page range [{}, {})", StartPage, EndPage); + LOGMAN_THROW_A_FMT(EndPage <= Code.NumPages(), "End page {} out of range ({})", EndPage, Code.NumPages()); + + // Include any pages that have relocations or block link records crossing + // into the current page range. This ensures we don't attempt to finalize + // any page twice, partially apply FEX relocations, or trigger page loads + // during block linking. + while (EndPage < Code.NumPages()) { + auto PageRelocs = SpanPageRelocations(Code, EndPage - 1); + if (!PageRelocs.empty()) { + auto It = std::prev(PageRelocs.end()); + size_t RelocEnd = It->Header.Offset + 16 /* Upper bound for relocation size */; + if (RelocEnd > EndPage * Utils::FEX_PAGE_SIZE) { + ++EndPage; + continue; + } + } + + // Check for trailing block link + { + auto PageRelocs = SpanPageRelocations(Code, EndPage); + if (!PageRelocs.empty() && PageRelocs.begin()->Header.Offset < EndPage * Utils::FEX_PAGE_SIZE + 0x18) { + ++EndPage; + continue; + } + } + break; + }; + while (StartPage != 0) { + auto PageRelocs = SpanPageRelocations(Code, StartPage - 1); + if (!PageRelocs.empty()) { + auto It = std::prev(PageRelocs.end()); + size_t RelocEnd = It->Header.Offset + 16 /* Upper bound for relocation size */; + if (RelocEnd > StartPage * Utils::FEX_PAGE_SIZE) { + --StartPage; + continue; + } + } + + // Check for trailing block link + { + auto PageRelocs = SpanPageRelocations(Code, StartPage); + if (!PageRelocs.empty() && PageRelocs.begin()->Header.Offset < StartPage * Utils::FEX_PAGE_SIZE + 0x18) { + --StartPage; + continue; + } + } + break; + }; + + return Code.CodeBuffer.subspan(StartPage * Utils::FEX_PAGE_SIZE, (EndPage - StartPage) * Utils::FEX_PAGE_SIZE); +} +} // namespace FEXCore + +namespace FEXCore::Context { + +void CodeCache::FinalizeCodePages(MappedCodeCacheFile& Code, std::span CodeRange) { + const size_t StartOffset = CodeRange.data() - Code.CodeBuffer.data(); + const auto StartPage = StartOffset / Utils::FEX_PAGE_SIZE; + const auto EndPage = StartPage + CodeRange.size_bytes() / Utils::FEX_PAGE_SIZE; + + // None of the selected pages should be loaded at all; otherwise, SelectCodeRangeToFinalize returned inconsistent ranges + LOGMAN_THROW_A_FMT(std::find(Code.LoadedPages.begin() + StartPage, Code.LoadedPages.begin() + EndPage, true) == Code.LoadedPages.begin() + EndPage, + "Inconsistent page load state"); + + FEXCORE_PROFILE_SCOPED("FinalizeCodePages"); + + // Map writeable to allow applying relocations + // TODO: Are there any remaining race conditions here? + Allocator::VirtualProtect(CodeRange.data(), CodeRange.size_bytes(), Allocator::ProtectOptions::Write); + + // Apply relocations + for (size_t i = StartPage; i < EndPage; ++i) { + auto PageRelocations = SpanPageRelocations(Code, i); + (void)ApplyCodeRelocations(Code.GuestBase, Code.CodeBuffer, PageRelocations, false); + Code.LoadedPages[i] = true; + } + + ARMEmitter::Emitter::ClearICache(CodeRange.data(), CodeRange.size_bytes()); + if (!Allocator::VirtualProtect(CodeRange.data(), CodeRange.size_bytes(), + Allocator::ProtectOptions::Read | Allocator::ProtectOptions::Write | Allocator::ProtectOptions::Exec)) { + ERROR_AND_DIE_FMT("VirtualProtect failed"); + } +} + } // namespace FEXCore::Context diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index 7184827004..813a363fb7 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -456,7 +456,6 @@ void ContextImpl::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThread if (Config.StrictInProcessSplitLocks) { FEXCore::Utils::SpinWaitLock::unlock(&StrictSplitLockMutex); } - return; } } diff --git a/FEXCore/Source/Interface/Core/LookupCache.cpp b/FEXCore/Source/Interface/Core/LookupCache.cpp index aaa0c24247..0b230306f0 100644 --- a/FEXCore/Source/Interface/Core/LookupCache.cpp +++ b/FEXCore/Source/Interface/Core/LookupCache.cpp @@ -87,8 +87,11 @@ void LookupCache::ClearL2Cache(const FEXCore::LookupCacheBaseLockToken& lk) { } void LookupCache::ClearThreadLocalCaches(const LookupCacheWriteLockToken&) { + // TODO: Preserve code cache entries? // Clear L1 and L2 by clearing the full cache. FEXCore::Allocator::VirtualDontNeed(reinterpret_cast(PagePointer), TotalCacheSize, false); + + // TODO: Rename this member to avoid confusion with code caching CachedCodePages.clear(); } diff --git a/FEXCore/include/FEXCore/Core/CodeCache.h b/FEXCore/include/FEXCore/Core/CodeCache.h index 21d4087210..eaa20a4ac4 100644 --- a/FEXCore/include/FEXCore/Core/CodeCache.h +++ b/FEXCore/include/FEXCore/Core/CodeCache.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -178,7 +179,54 @@ class CodeMapWriter { CodeMapOpener& FileOpener; }; +class AbstractCodeCache; + +/** + * Manages runtime state associated with a mapped code cache file. + * + * The mapped file pointer is managed by the frontend and must be valid + * throughout the lifetime of this object. + */ +struct MappedCodeCacheFile { + // Calls UnregisterMappedCodeBuffer internally, see its docstring about synchronization requirements + ~MappedCodeCacheFile(); + + // If not nullptr, the MappedCodeCacheFile will be unregistered from this on destruction + AbstractCodeCache* CacheManager; + + std::span MappedFile; // Mapped data of the whole cache file + std::span CodeBuffer; // Subspan of cached ARM64 data within MappedFile + std::byte* BlockListInFile; // Pointer to BlockListEntry data within MappedFile + uint32_t NumBlocks; // Number of BlockListEntry objects + uint32_t NumCodePages; // Number of code page entrypoint mappings + + struct PageRelocationRange { + uint32_t Offset; // In bytes from start of file + uint32_t Length; // Number of relocations + }; + + // List of relocation ranges in the mapped cache file, grouped by the code page they apply to. + // This vector is indexed by the relative page offset from the start of the ARM64 code data. + // + // For example PageRelocationRanges[1] == { 0x100, 0x20 } means: + // - there are 0x20 bytes of relocation data at offset 0x100 in the cache file + // - these 0x20 bytes of relocation data will patch data at CodeBuffer[0x1000..0x2000] + fextl::vector PageRelocationRanges; + fextl::vector LoadedPages; + + uint64_t GuestBase {}; // Guest base address for relocation application + + // Helper member to prevent moving/copying without disallowing aggregate-construction + std::atomic disallow_copy_or_move; + + size_t NumPages() const { + return CodeBuffer.size_bytes() / FEXCore::Utils::FEX_PAGE_SIZE; + } +}; + class AbstractCodeCache { + fextl::vector> MappedCodeBuffers; + public: virtual ~AbstractCodeCache() = default; @@ -190,13 +238,6 @@ class AbstractCodeCache { */ virtual uint64_t ComputeCodeMapId(std::string_view Filename, int FD) = 0; - /** - * Loads a code cache from mapped memory and appends it to the current Core state. - * TODO: Optionally recompiles all contained code blocks at runtime for validation. - * Returns false if the provided cache file is invalid, and true otherwise. - */ - virtual bool LoadData(Core::InternalThreadState*, std::byte* MappedCacheFile, const ExecutableFileSectionInfo&) = 0; - /** * Bundles the current Core state (CodeBuffer, GuestToHostMapping, ...) to a code cache and writes it to the given file descriptor. * Returns true on success. @@ -207,6 +248,42 @@ class AbstractCodeCache { * Function to be called before compiling any code for caching purposes */ virtual void InitiateCacheGeneration() = 0; + + /** + * Loads a code cache from mapped memory. + * + * Code sections must be enabled in a second step (see EnableLoadedSection). + * Afterwards, individual code pages must be finalized using FinalizeCodePages. + * + * On success, this returns a MappedCodeCacheFile that must be kept alive + * as long the cache is in use. + */ + virtual fextl::unique_ptr LoadCache(std::span CacheFile, const ExecutableFileInfo&, uint64_t FileStartVA) = 0; + + /** + * Registers cached blocks for the given file section to the LookupCache. + * + * Also runs extended cache validation if enabled. + */ + virtual bool EnableLoadedSection(Core::InternalThreadState*, MappedCodeCacheFile&, const ExecutableFileSectionInfo&) = 0; + + /** + * Extend the given code range so that it can be safely finalized. + * + * This is required for example to avoid dangling page-crossing FEX relocations on the edges + * + * StartPage and EndPage a 0-based relative page offsets into the cached code. + */ + static std::span SelectCodeRangeToFinalize(MappedCodeCacheFile&, size_t StartPage, size_t EndPage); + + /** + * Finalize code pages in the given range (see SelectCodePagesToFinalize) for execution. + */ + virtual void FinalizeCodePages(MappedCodeCacheFile&, std::span CodeRange) = 0; + + void RegisterMappedCodeBuffer(MappedCodeCacheFile&); + void UnregisterMappedCodeBuffer(MappedCodeCacheFile&); + bool IsAddressInMappedCodeBuffer(uintptr_t Address) const; }; } // namespace FEXCore diff --git a/FEXCore/include/FEXCore/Utils/SignalScopeGuards.h b/FEXCore/include/FEXCore/Utils/SignalScopeGuards.h index d4ceeab89d..6589cf2d30 100644 --- a/FEXCore/include/FEXCore/Utils/SignalScopeGuards.h +++ b/FEXCore/include/FEXCore/Utils/SignalScopeGuards.h @@ -35,6 +35,9 @@ class ForkableUniqueMutex final { const auto Result = pthread_mutex_lock(&Mutex); LOGMAN_THROW_A_FMT(Result == 0, "{} failed to lock with {}", __func__, Result); } + bool try_lock() { + return pthread_mutex_trylock(&Mutex) == 0; + } void unlock() { const auto Result = pthread_mutex_unlock(&Mutex); LOGMAN_THROW_A_FMT(Result == 0, "{} failed to unlock with {}", __func__, Result); diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp index bf6a5a0fec..b726033e94 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp @@ -899,9 +899,19 @@ uint64_t UnimplementedSyscallSafe(FEXCore::Core::CpuStateFrame* Frame, uint64_t } void SyscallHandler::LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) { - TM.LockBeforeFork(); - Thread->CTX->LockBeforeFork(Thread); - VMATracking.Mutex.lock(); + while (true) { + TM.LockBeforeFork(); + Thread->CTX->LockBeforeFork(Thread); + if (std::try_lock(CodeCachePatchingMutex, VMATracking.Mutex) == -1) { + break; + } + + // Lock failed: Another thread has temporarily acquired these mutexes. + // Release them to a void a deadlock and retry later + CTX->UnlockAfterFork(Thread, false); + TM.UnlockAfterFork(Thread, false); + std::this_thread::sleep_for(std::chrono::milliseconds {10}); + }; } void SyscallHandler::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThread, bool Child) { @@ -910,8 +920,10 @@ void SyscallHandler::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThr FM.SetProtectedCodeMapFD(-1); VMATracking.Mutex.StealAndDropActiveLocks(); + CodeCachePatchingMutex.StealAndDropActiveLocks(); } else { VMATracking.Mutex.unlock(); + CodeCachePatchingMutex.unlock(); } CTX->UnlockAfterFork(LiveThread, Child); diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.h b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.h index 9810099ef2..6b7a30602b 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.h +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.h @@ -265,7 +265,7 @@ class SyscallHandler : public FEXCore::HLE::SyscallHandler, if (SMCChecks != FEXCore::Config::CONFIG_SMC_NONE) { TM.InvalidateGuestCodeRange(Thread, Base, Length); } - if (CheckPendingVMAResources) { + if (CheckPendingVMAResources && Thread) { auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread); VMATracking.FlushPendingResourceDeletions(); } @@ -363,6 +363,9 @@ class SyscallHandler : public FEXCore::HLE::SyscallHandler, std::mutex FutexMutex; std::mutex SyscallMutex; + // std::mutex CodeCachePatchingMutex; + FEXCore::ForkableUniqueMutex CodeCachePatchingMutex; + FEX::CodeLoader* LocalLoader {}; bool NeedToCheckXID {true}; diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp index 2289281886..69fe6a163c 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp @@ -30,7 +30,21 @@ desc: SMC/MMan Tracking #include namespace FEX::HLE { -// SMC interactions +static void HandleSegfaultForCodeCacheFinalization(FEXCore::Core::InternalThreadState& Thread, FEXCore::MappedCodeCacheFile& Code, + uintptr_t FaultAddress) { + FEXCORE_PROFILE_SCOPED("Load code cache page"); + size_t PageIdx = (reinterpret_cast(FaultAddress) - Code.CodeBuffer.data()) / FEXCore::Utils::FEX_PAGE_SIZE; + + auto RangeToFinalize = Thread.CTX->GetCodeCache().SelectCodeRangeToFinalize(Code, PageIdx, PageIdx + 1); + if (!RangeToFinalize.empty()) { + Thread.CTX->GetCodeCache().FinalizeCodePages(Code, RangeToFinalize); + } +} + +// Handles segfaults from: +// - call-ret shadow stack overflow +// - guest-side self-modifying code (SMC) +// - lazy loading of mapped code cache pages bool SyscallHandler::HandleSegfault(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) { const auto FaultAddress = (uintptr_t)((siginfo_t*)info)->si_addr; @@ -46,13 +60,25 @@ bool SyscallHandler::HandleSegfault(FEXCore::Core::InternalThreadState* Thread, // Can't use the deferred signal lock in the SIGSEGV handler. auto lk = FEXCore::MaskSignalsAndLockMutex(_SyscallHandler->VMATracking.Mutex); - auto VMATracking = &_SyscallHandler->VMATracking; + auto& VMATracking = _SyscallHandler->VMATracking; // If the write spans two pages, they will be flushed one at a time (generating two faults) - auto Entry = VMATracking->FindVMAEntry(FaultAddress); + auto Entry = VMATracking.FindVMAEntry(FaultAddress); + + if (Entry == VMATracking.VMAs.end()) { + // Not a guest page; check mapped code cache pages + auto* Code = VMATracking.FindMappedCodeCacheByHostAddress(FaultAddress); + if (!Code) { + // Untracked address; not handled here + return false; + } + std::lock_guard lk(_SyscallHandler->CodeCachePatchingMutex); + HandleSegfaultForCodeCacheFinalization(*Thread, *Code, FaultAddress); + return true; + } - // If an untracked address, or the mapping wasn't writable, it can't be handled here - if (Entry == VMATracking->VMAs.end() || !Entry->second.Prot.Writable) { + // If the mapping wasn't writable, it can't be handled here + if (!Entry->second.Prot.Writable) { return false; } @@ -233,30 +259,44 @@ static ReadELFHeadersResult ReadELFHeaders(int FD, std::span HeaderDa return ReadELFHeadersResult {std::move(Parser.phdrs), std::move(Relocations), HasCodeRelocations}; } -static void LoadCodeCache(FEXCore::Core::InternalThreadState& Thread, FEXCore::ExecutableFileSectionInfo& Section, uint64_t CodeCacheConfigId) { +static fextl::unique_ptr +LoadCodeCache(FEXCore::Core::InternalThreadState& Thread, VMATracking::VMATracking& VMATracking, + const FEXCore::ExecutableFileInfo& FileInfo, uint64_t CodeCacheConfigId, uint64_t FileStartVA) { + auto& CodeCache = Thread.CTX->GetCodeCache(); + auto CacheFilename = fextl::fmt::format("{}cache/{}-{:016x}", FEX::Config::GetCacheDirectory(), - FEXCore::CodeMap::GetBaseFilename(Section.FileInfo, false), CodeCacheConfigId); + FEXCore::CodeMap::GetBaseFilename(FileInfo, false), CodeCacheConfigId); int CacheFD = open(CacheFilename.c_str(), O_RDONLY); if (CacheFD == -1) { LogMan::Msg::IFmt("Cache file does not exist: {}", CacheFilename); - return; + return nullptr; } struct stat buf; if (fstat(CacheFD, &buf) != 0) { LogMan::Msg::EFmt("Invalid cache file: {}", CacheFilename); close(CacheFD); - return; + return nullptr; } - auto CacheFileSize = buf.st_size; - auto MappedCache = (std::byte*)FEXCore::Allocator::mmap(nullptr, CacheFileSize, PROT_READ, MAP_PRIVATE, CacheFD, 0); - LOGMAN_THROW_A_FMT(MappedCache, "Failed to map code cache into memory"); - if (!Thread.CTX->GetCodeCache().LoadData(&Thread, MappedCache, Section)) { - // TODO: Delete this cache file - } - FEXCore::Allocator::munmap(MappedCache, CacheFileSize); + auto CacheFileSize = static_cast(buf.st_size); + auto MappedCache = (std::byte*)FEXCore::Allocator::mmap(nullptr, CacheFileSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, CacheFD, 0); close(CacheFD); + if (!MappedCache || MappedCache == MAP_FAILED) { + LogMan::Msg::EFmt("Failed to map code cache into memory"); + return nullptr; + } + + auto Result = CodeCache.LoadCache(std::span {MappedCache, CacheFileSize}, FileInfo, FileStartVA); + if (!Result) { + FEXCore::Allocator::munmap(MappedCache, CacheFileSize); + return nullptr; + } + + // NOTE: This is synchronized by acquiring VMATracking.Mutex at call site + CodeCache.RegisterMappedCodeBuffer(*Result); + + return Result; } void* SyscallHandler::GuestMmap(bool Is64Bit, FEXCore::Core::InternalThreadState* Thread, void* addr, size_t length, int prot, int flags, @@ -302,7 +342,8 @@ void* SyscallHandler::GuestMmap(bool Is64Bit, FEXCore::Core::InternalThreadState } if (EnableCodeCaching && CachedSection) { - LoadCodeCache(*Thread, *CachedSection, CodeCacheConfigId); + Thread->CTX->GetCodeCache().EnableLoadedSection( + Thread, *static_cast(CachedSection->FileInfo).MappedCache, *CachedSection); } return reinterpret_cast(Result); @@ -386,7 +427,7 @@ void SyscallHandler::TriggerGuestLibWrapperCodeCacheLoad(FEXCore::Core::Internal } auto SectionInfo = BuildSectionInfo(*VMAEntry->second.Resource, VMA->Base, VMA->Length); - LoadCodeCache(Thread, SectionInfo, CodeCacheConfigId); + LoadCodeCache(Thread, VMATracking, SectionInfo.FileInfo, CodeCacheConfigId, SectionInfo.FileStartVA); } } @@ -461,9 +502,12 @@ uint64_t SyscallHandler::GuestMprotect(FEXCore::Core::InternalThreadState* Threa } // Trigger delayed cache load. This must be done separately since - // LoadCodeCache will call interfaces that acquire the VMATracking mutex. + // EnableLoadedSection will call interfaces that acquire the VMATracking mutex. for (auto& CachedSection : CachedSections) { - LoadCodeCache(*Thread, CachedSection, CodeCacheConfigId); + auto Cache = static_cast(CachedSection.FileInfo).MappedCache.get(); + if (Cache) { + Thread->CTX->GetCodeCache().EnableLoadedSection(Thread, *Cache, CachedSection); + } } return Result; @@ -565,7 +609,7 @@ SyscallHandler::TrackMmap(FEXCore::Core::InternalThreadState* Thread, uint64_t a if (PathLength != -1 && S_ISREG(buf.st_mode) && (buf.st_mode & S_IXUSR)) { // ELF files that are mapped multiple times get a separate MappedResource for each base virtual address if ((prot & PROT_READ) && Inserted) { - Resource->MappedFile = fextl::make_unique(); + Resource->MappedFile = fextl::make_unique(); Resource->MappedFile->Filename = fextl::string(Tmp, PathLength); Resource->MappedFile->FileId = CTX->GetCodeCache().ComputeCodeMapId(Resource->MappedFile->Filename, fd); @@ -651,7 +695,14 @@ SyscallHandler::TrackMmap(FEXCore::Core::InternalThreadState* Thread, uint64_t a // Load code cache if present. // FEXServer was requested to generate library caches on program launch. if (EnableCodeCaching && Resource && Resource->MappedFile && VMATracking::VMAProt::fromProt(prot).Executable) { - if (Resource->MappedFile->Filename.ends_with("-guest.so")) { + if (!Resource->MappedFile->AttemptedCacheLoad) { + Resource->MappedFile->MappedCache = LoadCodeCache(*Thread, VMATracking, *Resource->MappedFile, CodeCacheConfigId, Resource->FirstVMA->Base); + Resource->MappedFile->AttemptedCacheLoad = true; + } + + if (!Resource->MappedFile->MappedCache) { + // No cache present + } else if (Resource->MappedFile->Filename.ends_with("-guest.so")) { // For guest library wrappers, cache loading must be delayed until LoadLib is called. // Before that, we can't patch up the SHA256 function identifiers. LogMan::Msg::IFmt("Delaying code cache load for {}", Resource->MappedFile->Filename); diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp index ad5146cde0..62490bfdc1 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp @@ -7,10 +7,20 @@ desc: VMA Tracking $end_info$ */ +#include "FEXCore/Utils/MathUtils.h" #include "LinuxSyscalls/Syscalls.h" #include namespace FEX::HLE::VMATracking { + +ExecutableFileState::~ExecutableFileState() { + if (MappedCache && MappedCache->MappedFile.data()) { + auto ret = FEXCore::Allocator::munmap(MappedCache->MappedFile.data(), + FEXCore::AlignUp(MappedCache->MappedFile.size_bytes(), FEXCore::Utils::FEX_PAGE_SIZE)); + LOGMAN_THROW_A_FMT(ret == 0, "Error unmapping cache for {}: {} {}", Filename, errno, strerror(errno)); + } +} + /// Helpers /// auto VMAProt::fromProt(int Prot) -> VMAProt { return VMAProt { @@ -238,7 +248,7 @@ void VMATracking::DeleteVMARange(FEXCore::Context::Context* CTX, uintptr_t Base, auto Iter = Current->Resource->Iterator; // Defer deletion if the resource has mapped code cache data, so its code buffer // outlives code cache invalidation (which runs after the VMA lock is released). - if (false) { // TODO: Consider unconditionally deferring deletion? + if (Current->Resource->MappedFile && Current->Resource->MappedFile->MappedCache) { PendingResourceDeletions.push_back(std::move(*Current->Resource)); } MappedResources.erase(Iter); @@ -554,7 +564,7 @@ uintptr_t VMATracking::DeleteSHMRegion(FEXCore::Context::Context* CTX, uintptr_t if (Entry->second.Resource == Resource) { if (ListRemove(&Entry->second)) { auto Iter = Entry->second.Resource->Iterator; - if (false) { // TODO: Consider unconditionally deferring deletion? + if (Entry->second.Resource->MappedFile && Entry->second.Resource->MappedFile->MappedCache) { PendingResourceDeletions.push_back(std::move(*Entry->second.Resource)); } MappedResources.erase(Iter); @@ -567,4 +577,18 @@ uintptr_t VMATracking::DeleteSHMRegion(FEXCore::Context::Context* CTX, uintptr_t return ShmLength; } + +FEXCore::MappedCodeCacheFile* VMATracking::FindMappedCodeCacheByHostAddress(uintptr_t HostAddr) const { + for (auto& [_, Resource] : MappedResources) { + if (Resource.MappedFile && Resource.MappedFile->MappedCache) { + auto* Code = Resource.MappedFile->MappedCache.get(); + auto BufferStart = reinterpret_cast(Code->CodeBuffer.data()); + if (HostAddr >= BufferStart && HostAddr < BufferStart + Code->CodeBuffer.size_bytes()) { + return Code; + } + } + } + return nullptr; +} + } // namespace FEX::HLE::VMATracking diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.h b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.h index e6f7f826b9..8998fdb840 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.h +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.h @@ -4,12 +4,18 @@ #include #include +#include "FEXCore/Core/CodeCache.h" #include #include #include #include +namespace FEXCore { +struct ExecutableFileInfo; +struct MappedCodeCacheFile; +} // namespace FEXCore + namespace FEX::HLE::VMATracking { ///// VMA (Virtual Memory Area) tracking ///// @@ -32,6 +38,13 @@ struct MRID { struct VMAEntry; +struct ExecutableFileState : FEXCore::ExecutableFileInfo { + ~ExecutableFileState(); + + bool AttemptedCacheLoad = false; + fextl::unique_ptr MappedCache; +}; + /** * Meta data associated to one system resource. * @@ -43,7 +56,7 @@ struct VMAEntry; struct MappedResource { using ContainerType = fextl::multimap; - fextl::unique_ptr MappedFile; + fextl::unique_ptr MappedFile; // Pointer to lowest memory range this file is mapped to VMAEntry* FirstVMA; uint64_t Length; // 0 if not fixed size @@ -136,6 +149,10 @@ struct VMATracking { return MappedResources.equal_range(mrid); } + // Find any MappedCodeCacheFile that contains the given host code address. + // - Mutex must be shared_locked before calling + FEXCore::MappedCodeCacheFile* FindMappedCodeCacheByHostAddress(uintptr_t HostAddr) const; + bool HasPendingResourceDeletions() const { return !PendingResourceDeletions.empty(); } diff --git a/Source/Windows/Common/ImageTracker.cpp b/Source/Windows/Common/ImageTracker.cpp index 01d5fa90c8..972d561a05 100644 --- a/Source/Windows/Common/ImageTracker.cpp +++ b/Source/Windows/Common/ImageTracker.cpp @@ -172,7 +172,7 @@ FEXCore::ExecutableFileSectionInfo ImageTracker::HandleImageMap(std::string_view auto AOTImage = AOTImages.find(ID); if (AOTImage != AOTImages.end()) { - CTX.GetCodeCache().LoadData(nullptr, AOTImage->second.Data, ImageInfo->SectionInfo); + // TODO: CodeCache::EnableLoadedSection } } @@ -277,6 +277,7 @@ void ImageTracker::LoadAOTImages(MappedImageInfo& ImageInfo) { RtlUnicodeToMultiByteN(UniqueId.data(), AnsiLength, NULL, Info->FileName, Info->FileNameLength); AOTImages[UniqueId] = {.Data = static_cast(LoadAddress)}; + // TODO: CodeCache::LoadCache, CodeCache::RegisterMappedCodeBuffer LogMan::Msg::IFmt("Loaded cache: {}", UniqueId); } } From a5df5f95dd686debc4bc979b275429edaa72ebb3 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 13 May 2026 16:39:05 +0200 Subject: [PATCH 07/12] CodeCache: Ensure atomicity of code page finalization --- FEXCore/Source/Interface/Context/Context.h | 4 +- FEXCore/Source/Interface/Core/CodeCache.cpp | 95 +++++++++++++++---- FEXCore/include/FEXCore/Core/CodeCache.h | 11 ++- .../LinuxSyscalls/SyscallsSMCTracking.cpp | 2 +- 4 files changed, 89 insertions(+), 23 deletions(-) diff --git a/FEXCore/Source/Interface/Context/Context.h b/FEXCore/Source/Interface/Context/Context.h index b3868ca224..9bc21d8d5e 100644 --- a/FEXCore/Source/Interface/Context/Context.h +++ b/FEXCore/Source/Interface/Context/Context.h @@ -118,12 +118,14 @@ class CodeCache : public AbstractCodeCache { * Note that FEX relocations are unrelated to ELF/PE relocations. * * @param GuestDelta Guest address offset to apply to RIP-relative data + * @param RelocationOffset Offset to subtract from relocation target offsets * @param ForStorage True for serializing data (producing deterministic output); false for de-serializing it (resolving dynamic symbols) * * @return Returns true on success */ [[nodiscard]] - bool ApplyCodeRelocations(uint64_t GuestDelta, std::span Code, std::span Relocations, bool ForStorage); + bool ApplyCodeRelocations(uint64_t GuestDelta, std::span Code, std::span Relocations, + uint32_t RelocationOffset, bool ForStorage); }; class ContextImpl final : public FEXCore::Context::Context, public CPU::CodeBufferManager { diff --git a/FEXCore/Source/Interface/Core/CodeCache.cpp b/FEXCore/Source/Interface/Core/CodeCache.cpp index 9d23b311f5..f7c47e122e 100644 --- a/FEXCore/Source/Interface/Core/CodeCache.cpp +++ b/FEXCore/Source/Interface/Core/CodeCache.cpp @@ -45,6 +45,12 @@ MappedCodeCacheFile::~MappedCodeCacheFile() { if (CacheManager) { CacheManager->UnregisterMappedCodeBuffer(*this); } + +#ifndef _WIN32 + if (CodeBuffer.empty()) { + FEXCore::Allocator::munmap(CodeBuffer.data(), CodeBuffer.size_bytes()); + } +#endif } void AbstractCodeCache::RegisterMappedCodeBuffer(MappedCodeCacheFile& Code) { @@ -346,7 +352,7 @@ bool CodeCache::SaveData(Core::InternalThreadState& Thread, int fd, const Execut // Dump the host code (relocated for position-independent serialization) std::span CodeBufferData(reinterpret_cast(CodeBuffer->Ptr), reinterpret_cast(CodeBuffer->Ptr) + CTX.LatestOffset); - if (!ApplyCodeRelocations(SerializedBaseAddress, CodeBufferData, Relocations, true)) { + if (!ApplyCodeRelocations(SerializedBaseAddress, CodeBufferData, Relocations, 0, true)) { LOGMAN_THROW_A_FMT(false, "Failed to apply code relocations"); return false; } @@ -428,7 +434,7 @@ void CodeCache::Validate(const ExecutableFileSectionInfo& Section, fextl::setLatestOffset <= CodeBufferRangeRef.size()) { // Reference compilation produced fewer bytes than our cache, so validation is going to fail. @@ -490,11 +496,13 @@ void CodeCache::Validate(const ExecutableFileSectionInfo& Section, fextl::set Code, - std::span EntryRelocations, bool ForStorage) { + std::span EntryRelocations, uint32_t RelocationOffset, bool ForStorage) { CPU::Arm64Emitter Emitter(&CTX, Code.data(), Code.size_bytes()); for (size_t j = 0; j < EntryRelocations.size(); ++j) { const FEXCore::CPU::Relocation& Reloc = EntryRelocations[j]; - Emitter.SetCursorOffset(Reloc.Header.Offset); + LOGMAN_THROW_A_FMT(Reloc.Header.Offset >= RelocationOffset, "Invalid relocation offset"); + LOGMAN_THROW_A_FMT(Reloc.Header.Offset - RelocationOffset < Code.size_bytes(), "Invalid relocation offset"); + Emitter.SetCursorOffset(Reloc.Header.Offset - RelocationOffset); switch (Reloc.Header.Type) { case FEXCore::CPU::RelocationTypes::RELOC_NAMED_SYMBOL_LITERAL: { @@ -582,8 +590,20 @@ CodeCache::LoadCache(std::span CacheFile, const ExecutableFileInfo& F Cursor = reinterpret_cast(AlignUp(reinterpret_cast(Cursor), Utils::FEX_PAGE_SIZE)); auto CodeDataInFile = std::span {Cursor, header.CodeBufferSize}; - // Make code data inaccessible until finalized - Allocator::VirtualProtect(CodeDataInFile.data(), header.CodeBufferSize, Allocator::ProtectOptions::None); +#ifndef _WIN32 + // Allocate target memory for post-relocation code. This is PROT_NONE until + // the first execution, so that contents can be lazily populated in a + // frontend-provided segfault handler. + void* CodeBufferAllocation = Allocator::mmap(nullptr, header.CodeBufferSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (CodeBufferAllocation == MAP_FAILED) { + LogMan::Msg::EFmt("Failed to reserve target memory for code cache"); + return nullptr; + } + auto CodeBuffer = std::span {static_cast(CodeBufferAllocation), header.CodeBufferSize}; +#else + // TODO: Implement lazy mapping on Windows + auto CodeBuffer = CodeDataInFile; +#endif // Group relocations by page size_t NumPages = header.CodeBufferSize / Utils::FEX_PAGE_SIZE; @@ -600,7 +620,7 @@ CodeCache::LoadCache(std::span CacheFile, const ExecutableFileInfo& F auto Storage = FEXCore::Allocator::aligned_alloc(alignof(MappedCodeCacheFile), sizeof(MappedCodeCacheFile)); return fextl::unique_ptr( - new (Storage) MappedCodeCacheFile {this, CacheFile, CodeDataInFile, BlockListStart, header.NumBlocks, header.NumCodePages, + new (Storage) MappedCodeCacheFile {this, CacheFile, CodeDataInFile, CodeBuffer, BlockListStart, header.NumBlocks, header.NumCodePages, std::move(PageRelocationRanges), fextl::vector(NumPages), FileStartVA}); } @@ -674,7 +694,7 @@ bool CodeCache::EnableLoadedSection(Core::InternalThreadState* Thread, MappedCod } // Guest code pages - auto* Cursor = Code.CodeBuffer.data() + Code.CodeBuffer.size_bytes(); + auto* Cursor = Code.CodeBufferInFile.data() + Code.CodeBufferInFile.size_bytes(); fextl::vector Entrypoints; for (uint32_t i = 0; i < Code.NumCodePages; ++i) { uint64_t CodePage; @@ -699,7 +719,12 @@ bool CodeCache::EnableLoadedSection(Core::InternalThreadState* Thread, MappedCod } } +#ifndef _WIN32 if (!EnableLazyCodeCaching || EnableCodeCacheValidation) { +#else + // TODO: Implement lazy mapping on Windows + if (true) { +#endif auto Range = SelectCodeRangeToFinalize(Code, 0, Code.CodeBuffer.size_bytes() / Utils::FEX_PAGE_SIZE); FinalizeCodePages(Code, Range); } @@ -792,6 +817,7 @@ void CodeCache::FinalizeCodePages(MappedCodeCacheFile& Code, std::span(Allocator::VirtualAlloc(nullptr, Size, true)); + if (!Staging) { + ERROR_AND_DIE_FMT("Failed to allocate {} bytes of staging memory for code-cache finalization", Size); + } + + // Copy code from the cache file to the staging buffer + memcpy(Staging, Code.CodeBufferInFile.data() + StartOffset, Size); // Apply relocations + auto StagingSpan = std::span {Staging, Size}; for (size_t i = StartPage; i < EndPage; ++i) { auto PageRelocations = SpanPageRelocations(Code, i); - (void)ApplyCodeRelocations(Code.GuestBase, Code.CodeBuffer, PageRelocations, false); + (void)ApplyCodeRelocations(Code.GuestBase, StagingSpan, PageRelocations, static_cast(StartOffset), false); Code.LoadedPages[i] = true; } - ARMEmitter::Emitter::ClearICache(CodeRange.data(), CodeRange.size_bytes()); - if (!Allocator::VirtualProtect(CodeRange.data(), CodeRange.size_bytes(), - Allocator::ProtectOptions::Read | Allocator::ProtectOptions::Write | Allocator::ProtectOptions::Exec)) { - ERROR_AND_DIE_FMT("VirtualProtect failed"); + // Atomically make the finalized code data visible by remapping the staging + // buffer onto the requested CodeBuffer window. MREMAP_DONTUNMAP is used to + // leave the old VA range reserved so that we can cleanly deallocate it + // through Allocator. + void* RemapResult = ::mremap(Staging, Size, Size, MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP, CodeRange.data()); + if (RemapResult == MAP_FAILED) { + ERROR_AND_DIE_FMT("{}: mremap failed: {}", __FUNCTION__, errno); } + Allocator::VirtualFree(Staging, Size); + + // Release resident file pages that will no longer be needed. The VA range is left allocated to allow cleanup with a single VirtualFree. + Allocator::VirtualDontNeed(Code.CodeBufferInFile.data() + StartOffset, Size); +#else + // TODO: Implement lazy mapping on Windows + for (size_t i = StartPage; i < EndPage; ++i) { + auto PageRelocations = SpanPageRelocations(Code, i); + (void)ApplyCodeRelocations(Code.GuestBase, Code.CodeBuffer, PageRelocations, 0, false); + Code.LoadedPages[i] = true; + } +#endif + + ARMEmitter::Emitter::ClearICache(CodeRange.data(), Size); } } // namespace FEXCore::Context diff --git a/FEXCore/include/FEXCore/Core/CodeCache.h b/FEXCore/include/FEXCore/Core/CodeCache.h index eaa20a4ac4..bd99d4bd2e 100644 --- a/FEXCore/include/FEXCore/Core/CodeCache.h +++ b/FEXCore/include/FEXCore/Core/CodeCache.h @@ -194,11 +194,12 @@ struct MappedCodeCacheFile { // If not nullptr, the MappedCodeCacheFile will be unregistered from this on destruction AbstractCodeCache* CacheManager; - std::span MappedFile; // Mapped data of the whole cache file - std::span CodeBuffer; // Subspan of cached ARM64 data within MappedFile - std::byte* BlockListInFile; // Pointer to BlockListEntry data within MappedFile - uint32_t NumBlocks; // Number of BlockListEntry objects - uint32_t NumCodePages; // Number of code page entrypoint mappings + std::span MappedFile; // Mapped data of the whole cache file + std::span CodeBufferInFile; // Subspan of cached ARM64 data within MappedFile (pre-relocation) + std::span CodeBuffer; // Cached ARM64 data used for execution (post-relocation; owned by MappedCodeCacheFile) + std::byte* BlockListInFile; // Pointer to BlockListEntry data within MappedFile + uint32_t NumBlocks; // Number of BlockListEntry objects + uint32_t NumCodePages; // Number of code page entrypoint mappings struct PageRelocationRange { uint32_t Offset; // In bytes from start of file diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp index 69fe6a163c..12ad907726 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp @@ -280,7 +280,7 @@ LoadCodeCache(FEXCore::Core::InternalThreadState& Thread, VMATracking::VMATracki } auto CacheFileSize = static_cast(buf.st_size); - auto MappedCache = (std::byte*)FEXCore::Allocator::mmap(nullptr, CacheFileSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, CacheFD, 0); + auto MappedCache = (std::byte*)FEXCore::Allocator::mmap(nullptr, CacheFileSize, PROT_READ, MAP_PRIVATE, CacheFD, 0); close(CacheFD); if (!MappedCache || MappedCache == MAP_FAILED) { LogMan::Msg::EFmt("Failed to map code cache into memory"); From a1124f53cfde18ec4f85212edee992084fc67d26 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 17 May 2026 22:28:14 +0100 Subject: [PATCH 08/12] Windows/ImageTracker: Wire up LoadCache and EnableLoadedSection The lazy code loading refactor replaced LoadData with the new LoadCache/EnableLoadedSection API but left the Windows path as TODOs. Implement the wiring: LoadAOTImages now calls LoadCache + RegisterMappedCodeBuffer for each mapped cache file, and HandleImageMap calls EnableLoadedSection (with nullptr thread since lazy mapping is not yet implemented on Windows). --- Source/Windows/Common/ImageTracker.cpp | 17 ++++++++++++----- Source/Windows/Common/ImageTracker.h | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Source/Windows/Common/ImageTracker.cpp b/Source/Windows/Common/ImageTracker.cpp index 972d561a05..c9652f4cb0 100644 --- a/Source/Windows/Common/ImageTracker.cpp +++ b/Source/Windows/Common/ImageTracker.cpp @@ -171,8 +171,8 @@ FEXCore::ExecutableFileSectionInfo ImageTracker::HandleImageMap(std::string_view } auto AOTImage = AOTImages.find(ID); - if (AOTImage != AOTImages.end()) { - // TODO: CodeCache::EnableLoadedSection + if (AOTImage != AOTImages.end() && AOTImage->second.CacheFile) { + CTX.GetCodeCache().EnableLoadedSection(nullptr, *AOTImage->second.CacheFile, ImageInfo->SectionInfo); } } @@ -276,9 +276,16 @@ void ImageTracker::LoadAOTImages(MappedImageInfo& ImageInfo) { UniqueId.resize(AnsiLength); RtlUnicodeToMultiByteN(UniqueId.data(), AnsiLength, NULL, Info->FileName, Info->FileNameLength); - AOTImages[UniqueId] = {.Data = static_cast(LoadAddress)}; - // TODO: CodeCache::LoadCache, CodeCache::RegisterMappedCodeBuffer - LogMan::Msg::IFmt("Loaded cache: {}", UniqueId); + auto CacheSpan = std::span {static_cast(LoadAddress), MappedSize}; + auto MappedCache = CTX.GetCodeCache().LoadCache(CacheSpan, ImageInfo.Info, ImageInfo.SectionInfo.FileStartVA); + if (MappedCache) { + CTX.GetCodeCache().RegisterMappedCodeBuffer(*MappedCache); + AOTImages[UniqueId] = {.CacheFile = std::move(MappedCache)}; + LogMan::Msg::IFmt("Loaded cache: {}", UniqueId); + } else { + NtUnmapViewOfSection(NtCurrentProcess(), LoadAddress); + LogMan::Msg::EFmt("Failed to load cache: {}", UniqueId); + } } } } diff --git a/Source/Windows/Common/ImageTracker.h b/Source/Windows/Common/ImageTracker.h index dcbc06d00a..438bcbc6f8 100644 --- a/Source/Windows/Common/ImageTracker.h +++ b/Source/Windows/Common/ImageTracker.h @@ -56,7 +56,7 @@ class ImageTracker : public FEXCore::CodeMapOpener { }; struct AOTImageInfo { - std::byte* Data; + fextl::unique_ptr CacheFile; }; void LoadAOTImages(MappedImageInfo& Info); From 769e3e32af85e47b8dec0c197e1022fc0fde7115 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 17 May 2026 20:36:34 +0100 Subject: [PATCH 09/12] FEX: Switch serialization to use mmap instead of write A significant proportion of time with the prior approach was spent in the unbuffered write calls used once per code page/entrypoint; taking upwards of 15 seconds to serialize a 700MB P3R cache. Switching to an mmap approach reduces the CPU time there to less than a second, leaving I/O to dominate. A two-pass approach is used: 1. Calculate the required size for the cache file, issue a callback to frontend to map a cache file of that size. 2. Write cache data into the buffer provided by that callback. --- FEXCore/Source/Interface/Context/Context.h | 3 +- FEXCore/Source/Interface/Core/CodeCache.cpp | 161 ++++++++++++-------- FEXCore/include/FEXCore/Core/CodeCache.h | 7 +- Source/Tools/FEXOfflineCompiler/Main.cpp | 28 +++- 4 files changed, 133 insertions(+), 66 deletions(-) diff --git a/FEXCore/Source/Interface/Context/Context.h b/FEXCore/Source/Interface/Context/Context.h index 9bc21d8d5e..e8ac82c024 100644 --- a/FEXCore/Source/Interface/Context/Context.h +++ b/FEXCore/Source/Interface/Context/Context.h @@ -80,7 +80,8 @@ class CodeCache : public AbstractCodeCache { FEX_CONFIG_OPT(EnableCodeCacheValidation, ENABLECODECACHEVALIDATION); uint64_t ComputeCodeMapId(std::string_view Filename, int FD) override; - bool SaveData(Core::InternalThreadState&, int TargetFD, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress) override; + bool SaveData(Core::InternalThreadState&, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress, + std::function MapFile) override; fextl::unique_ptr LoadCache(std::span CacheFile, const ExecutableFileInfo&, uint64_t FileStartVA) override; diff --git a/FEXCore/Source/Interface/Core/CodeCache.cpp b/FEXCore/Source/Interface/Core/CodeCache.cpp index f7c47e122e..2590c66d22 100644 --- a/FEXCore/Source/Interface/Core/CodeCache.cpp +++ b/FEXCore/Source/Interface/Core/CodeCache.cpp @@ -293,93 +293,134 @@ struct CodeCacheHeader { template concept OrderedContainer = requires { typename T::key_compare; }; -bool CodeCache::SaveData(Core::InternalThreadState& Thread, int fd, const ExecutableFileSectionInfo& SourceBinary, uint64_t SerializedBaseAddress) { - auto CodeBuffer = CTX.GetLatest(); - auto& LookupCache = *Thread.LookupCache->Shared; - auto Relocations = Thread.CPUBackend->TakeRelocations(SourceBinary.FileStartVA); +template +struct CodeCacheSerializer { + std::byte* Buffer {}; + size_t Offset {}; + + void WriteBuf(const void* Buf, size_t Size) { + if constexpr (!SizeCalc) { + memcpy(Buffer + Offset, Buf, Size); + } + Offset += Size; + } + template + void WriteObj(const T& Obj) { + WriteBuf(&Obj, sizeof(Obj)); + } + + void PadToPage() { + Offset = AlignUp(Offset, Utils::FEX_PAGE_SIZE); + } +}; + +using SerializedBlockList = fextl::vector>; + +template +static bool SerializeCodeCache(CodeCache& Cache, CodeCacheSerializer& Serializer, const CodeCacheHeader& header, + const SerializedBlockList& BlockList, std::span Relocations, + const GuestToHostMap& LookupCache, const CPU::CodeBuffer* CodeBuffer, size_t CodeBufferUsedSize, + const ExecutableFileSectionInfo& SourceBinary, uint64_t SerializedBaseAddress) { // Write file header - CodeCacheHeader header {}; - static_assert(GIT_HASH.size() == sizeof(header.FEXVersion)); - std::ranges::copy(GIT_HASH, header.FEXVersion); - header.NumBlocks = LookupCache.BlockList.size(); - header.NumCodePages = LookupCache.CodePages.size(); - header.CodeBufferSize = FEXCore::AlignUp(CTX.LatestOffset, Utils::FEX_PAGE_SIZE); - header.NumRelocations = Relocations.size(); - header.SerializedBaseAddress = SerializedBaseAddress; - ::write(fd, &header, sizeof(header)); + Serializer.WriteObj(header); // Dump guest<->host block mappings - { - // Cache contents must be deterministic, so copy the unordered block list and then sort by key - static_assert(!OrderedContainer, "Already deterministic; drop temporary container"); - fextl::vector> BlockList; - BlockList.reserve(LookupCache.BlockList.size()); - for (auto& [Guest, BlockEntry] : LookupCache.BlockList) { - static_assert(sizeof(Guest) == 8, "Breaking change in code cache data layout"); - BlockList.emplace_back(Guest, &BlockEntry); - } - std::ranges::sort(BlockList); - - for (auto [Guest, Host] : BlockList) { - static_assert(sizeof(Host->HostCode) == 8, "Breaking change in code cache data layout"); - static_assert(sizeof(Host->CodePages[0]) == 8, "Breaking change in code cache data layout"); - - Guest -= SourceBinary.FileStartVA; - ::write(fd, &Guest, sizeof(Guest)); - uint64_t HostCode = Host->HostCode - reinterpret_cast(CodeBuffer->Ptr); - ::write(fd, &HostCode, sizeof(HostCode)); - uint64_t NumCodePages = Host->CodePages.size(); - ::write(fd, &NumCodePages, sizeof(NumCodePages)); + for (auto [Guest, Host] : BlockList) { + static_assert(sizeof(Host->HostCode) == 8, "Breaking change in code cache data layout"); + static_assert(sizeof(Host->CodePages[0]) == 8, "Breaking change in code cache data layout"); + + uint64_t AdjustedGuest = Guest - SourceBinary.FileStartVA; + Serializer.WriteObj(AdjustedGuest); + uint64_t HostCode = Host->HostCode - reinterpret_cast(CodeBuffer->Ptr); + Serializer.WriteObj(HostCode); + uint64_t NumCodePages = Host->CodePages.size(); + Serializer.WriteObj(NumCodePages); + if constexpr (SizeCalc) { LOGMAN_THROW_A_FMT(std::ranges::is_sorted(Host->CodePages), "Code pages aren't sorted"); - for (auto CodePage : Host->CodePages) { - CodePage -= SourceBinary.FileStartVA; - ::write(fd, &CodePage, sizeof(CodePage)); - } + } + for (auto CodePage : Host->CodePages) { + uint64_t AdjustedPage = CodePage - SourceBinary.FileStartVA; + Serializer.WriteObj(AdjustedPage); } } // Dump relocations - static_assert(sizeof(Relocations[0]) == 48, "Breaking change in code cache data layout"); - ::write(fd, Relocations.data(), Relocations.size() * sizeof(Relocations[0])); + static_assert(sizeof(CPU::Relocation) == 48, "Breaking change in code cache data layout"); + Serializer.WriteBuf(Relocations.data(), Relocations.size_bytes()); // Pad to next page in file so that the CodeBuffer can be mmap'ed into process on load - { - auto AlignedSize = AlignUp(lseek(fd, 0, SEEK_CUR), Utils::FEX_PAGE_SIZE); - ::ftruncate(fd, AlignedSize); - lseek(fd, AlignedSize, SEEK_SET); - } + Serializer.PadToPage(); // Dump the host code (relocated for position-independent serialization) - std::span CodeBufferData(reinterpret_cast(CodeBuffer->Ptr), reinterpret_cast(CodeBuffer->Ptr) + CTX.LatestOffset); - if (!ApplyCodeRelocations(SerializedBaseAddress, CodeBufferData, Relocations, 0, true)) { - LOGMAN_THROW_A_FMT(false, "Failed to apply code relocations"); - return false; - } - ::write(fd, CodeBufferData.data(), CodeBufferData.size()); - // Pad to next page in file for mmap - { - auto PaddedSize = AlignUp(lseek(fd, 0, SEEK_CUR), Utils::FEX_PAGE_SIZE); - ::ftruncate(fd, PaddedSize); - lseek(fd, PaddedSize, SEEK_SET); + auto CodeStartOffset = Serializer.Offset; + Serializer.WriteBuf(CodeBuffer->Ptr, CodeBufferUsedSize); + Serializer.Offset = CodeStartOffset + header.CodeBufferSize; + if constexpr (!SizeCalc) { + auto CodeBufferData = std::span {Serializer.Buffer + CodeStartOffset, CodeBufferUsedSize}; + if (!Cache.ApplyCodeRelocations(SerializedBaseAddress, CodeBufferData, Relocations, 0, true)) { + LOGMAN_THROW_A_FMT(false, "Failed to apply code relocations"); + return false; + } } // Dump code pages static_assert(OrderedContainer, "Non-deterministic data source"); for (const auto& [PageIndex, Entrypoints] : LookupCache.CodePages) { uint64_t PageAddr = (PageIndex << 12) - SourceBinary.FileStartVA; - ::write(fd, &PageAddr, sizeof(PageAddr)); + Serializer.WriteObj(PageAddr); uint64_t NumEntrypoints = Entrypoints.size(); - ::write(fd, &NumEntrypoints, sizeof(NumEntrypoints)); + Serializer.WriteObj(NumEntrypoints); for (uint64_t Entrypoint : Entrypoints) { - Entrypoint -= SourceBinary.FileStartVA; - ::write(fd, &Entrypoint, sizeof(Entrypoint)); + uint64_t AdjustedEntry = Entrypoint - SourceBinary.FileStartVA; + Serializer.WriteObj(AdjustedEntry); } } return true; } +bool CodeCache::SaveData(Core::InternalThreadState& Thread, const ExecutableFileSectionInfo& SourceBinary, uint64_t SerializedBaseAddress, + std::function MapFile) { + auto CodeBuffer = CTX.GetLatest(); + auto& LookupCache = *CodeBuffer->LookupCache; + auto Relocations = Thread.CPUBackend->TakeRelocations(SourceBinary.FileStartVA); + + // Cache contents must be deterministic, so copy the unordered block list and then sort by key + static_assert(!OrderedContainer, "Already deterministic; drop temporary container"); + SerializedBlockList BlockList; + BlockList.reserve(LookupCache.BlockList.size()); + for (auto& [Guest, BlockEntry] : LookupCache.BlockList) { + static_assert(sizeof(Guest) == 8, "Breaking change in code cache data layout"); + BlockList.emplace_back(Guest, &BlockEntry); + } + std::ranges::sort(BlockList); + + CodeCacheHeader header {}; + static_assert(GIT_HASH.size() == sizeof(header.FEXVersion)); + std::ranges::copy(GIT_HASH, header.FEXVersion); + header.NumBlocks = LookupCache.BlockList.size(); + header.NumCodePages = LookupCache.CodePages.size(); + header.CodeBufferSize = FEXCore::AlignUp(CTX.LatestOffset, Utils::FEX_PAGE_SIZE); + header.NumRelocations = Relocations.size(); + header.SerializedBaseAddress = SerializedBaseAddress; + + CodeCacheSerializer SizeSerializer {}; + if (!SerializeCodeCache(*this, SizeSerializer, header, BlockList, std::span {Relocations}, LookupCache, CodeBuffer.get(), + CTX.LatestOffset, SourceBinary, SerializedBaseAddress)) { + return false; + } + + std::byte* Buffer = static_cast(MapFile(SizeSerializer.Offset)); + if (!Buffer) { + return false; + } + + CodeCacheSerializer DataSerializer {Buffer}; + return SerializeCodeCache(*this, DataSerializer, header, BlockList, std::span {Relocations}, LookupCache, CodeBuffer.get(), + CTX.LatestOffset, SourceBinary, SerializedBaseAddress); +} + void CodeCache::Validate(const ExecutableFileSectionInfo& Section, fextl::set GuestBlocks, const fextl::set& HostBlocks, std::span CachedCode) { LOGMAN_THROW_A_FMT(!HostBlocks.empty(), "Tried to validate without any host blocks"); diff --git a/FEXCore/include/FEXCore/Core/CodeCache.h b/FEXCore/include/FEXCore/Core/CodeCache.h index bd99d4bd2e..f90a48ac24 100644 --- a/FEXCore/include/FEXCore/Core/CodeCache.h +++ b/FEXCore/include/FEXCore/Core/CodeCache.h @@ -1,7 +1,6 @@ // SPDX-License-Identifier: MIT #pragma once -#include #include #include #include @@ -12,6 +11,7 @@ #include #include +#include #include #include #include @@ -240,10 +240,11 @@ class AbstractCodeCache { virtual uint64_t ComputeCodeMapId(std::string_view Filename, int FD) = 0; /** - * Bundles the current Core state (CodeBuffer, GuestToHostMapping, ...) to a code cache and writes it to the given file descriptor. + * Bundles the current Core state (CodeBuffer, GuestToHostMapping, ...) to a code cache and writes it to the mapped cache file. * Returns true on success. */ - virtual bool SaveData(Core::InternalThreadState&, int TargetFD, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress) = 0; + virtual bool SaveData(Core::InternalThreadState&, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress, + std::function MapFile) = 0; /** * Function to be called before compiling any code for caching purposes diff --git a/Source/Tools/FEXOfflineCompiler/Main.cpp b/Source/Tools/FEXOfflineCompiler/Main.cpp index 80688afb85..72d4680adb 100644 --- a/Source/Tools/FEXOfflineCompiler/Main.cpp +++ b/Source/Tools/FEXOfflineCompiler/Main.cpp @@ -19,8 +19,11 @@ #include #include +#include #include #include +#include +#include class AOTSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEX::HLE::SyscallMmapInterface { public: @@ -225,10 +228,31 @@ static std::optional GenerateSingleCache(FEXCore::ExecutableFileInf auto Filename = fmt::format("{}{}-{:016x}", OutDir, FEXCore::CodeMap::GetBaseFilename(Binary, false), CodeCacheConfigId); auto FilenameNew = Filename + ".new"; - int fd = open(FilenameNew.c_str(), O_CREAT | O_WRONLY, 0644); + int fd = open(FilenameNew.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); { + void* MappedPtr {}; + size_t MapSize {}; + auto Entry = SyscallHandler->LookupExecutableFileSection(Thread, SyscallHandler->VAFileStart).value(); - CTX->GetCodeCache().SaveData(*Thread, fd, Entry, 0 /* TODO: Use static base address information if available */); + CTX->GetCodeCache().SaveData(*Thread, Entry, 0 /* TODO: Use static base address information if available */, [&](size_t TotalSize) -> void* { + if (ftruncate(fd, TotalSize) != 0) { + return nullptr; + } + + MapSize = TotalSize; + MappedPtr = mmap(nullptr, TotalSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (MappedPtr == MAP_FAILED) { + MappedPtr = nullptr; + return nullptr; + } + + return MappedPtr; + }); + + if (MappedPtr) { + munmap(MappedPtr, MapSize); + } } std::filesystem::rename(FilenameNew.c_str(), Filename.c_str()); close(fd); From f8cc4d7e0b20661cbf09d5ecc200e198e2315c4e Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 17 May 2026 22:01:44 +0100 Subject: [PATCH 10/12] CodeCache: Accept multiple threads in SaveData Merges and sorts relocations from all provided threads before serializing, enabling multi-threaded offline code cache generation. --- FEXCore/Source/Interface/Context/Context.h | 2 +- FEXCore/Source/Interface/Core/CodeCache.cpp | 14 +++++++++++--- FEXCore/include/FEXCore/Core/CodeCache.h | 2 +- Source/Tools/FEXOfflineCompiler/Main.cpp | 3 ++- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/FEXCore/Source/Interface/Context/Context.h b/FEXCore/Source/Interface/Context/Context.h index e8ac82c024..1604c2e004 100644 --- a/FEXCore/Source/Interface/Context/Context.h +++ b/FEXCore/Source/Interface/Context/Context.h @@ -80,7 +80,7 @@ class CodeCache : public AbstractCodeCache { FEX_CONFIG_OPT(EnableCodeCacheValidation, ENABLECODECACHEVALIDATION); uint64_t ComputeCodeMapId(std::string_view Filename, int FD) override; - bool SaveData(Core::InternalThreadState&, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress, + bool SaveData(std::span Threads, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress, std::function MapFile) override; fextl::unique_ptr LoadCache(std::span CacheFile, const ExecutableFileInfo&, uint64_t FileStartVA) override; diff --git a/FEXCore/Source/Interface/Core/CodeCache.cpp b/FEXCore/Source/Interface/Core/CodeCache.cpp index 2590c66d22..40faa82387 100644 --- a/FEXCore/Source/Interface/Core/CodeCache.cpp +++ b/FEXCore/Source/Interface/Core/CodeCache.cpp @@ -380,11 +380,19 @@ static bool SerializeCodeCache(CodeCache& Cache, CodeCacheSerializer& return true; } -bool CodeCache::SaveData(Core::InternalThreadState& Thread, const ExecutableFileSectionInfo& SourceBinary, uint64_t SerializedBaseAddress, - std::function MapFile) { +bool CodeCache::SaveData(std::span Threads, const ExecutableFileSectionInfo& SourceBinary, + uint64_t SerializedBaseAddress, std::function MapFile) { + LOGMAN_THROW_A_FMT(!Threads.empty(), "Tried to save code cache without compiler threads"); + + auto CodeBuffer = CTX.GetLatest(); auto& LookupCache = *CodeBuffer->LookupCache; - auto Relocations = Thread.CPUBackend->TakeRelocations(SourceBinary.FileStartVA); + fextl::vector Relocations; + for (auto* Thread : Threads) { + auto ThreadRelocations = Thread->CPUBackend->TakeRelocations(SourceBinary.FileStartVA); + Relocations.insert(Relocations.end(), ThreadRelocations.begin(), ThreadRelocations.end()); + } + std::ranges::sort(Relocations, {}, [](const auto& Relocation) { return Relocation.Header.Offset; }); // Cache contents must be deterministic, so copy the unordered block list and then sort by key static_assert(!OrderedContainer, "Already deterministic; drop temporary container"); diff --git a/FEXCore/include/FEXCore/Core/CodeCache.h b/FEXCore/include/FEXCore/Core/CodeCache.h index f90a48ac24..ff3dbfa6ae 100644 --- a/FEXCore/include/FEXCore/Core/CodeCache.h +++ b/FEXCore/include/FEXCore/Core/CodeCache.h @@ -243,7 +243,7 @@ class AbstractCodeCache { * Bundles the current Core state (CodeBuffer, GuestToHostMapping, ...) to a code cache and writes it to the mapped cache file. * Returns true on success. */ - virtual bool SaveData(Core::InternalThreadState&, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress, + virtual bool SaveData(std::span Threads, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress, std::function MapFile) = 0; /** diff --git a/Source/Tools/FEXOfflineCompiler/Main.cpp b/Source/Tools/FEXOfflineCompiler/Main.cpp index 72d4680adb..b46d29d687 100644 --- a/Source/Tools/FEXOfflineCompiler/Main.cpp +++ b/Source/Tools/FEXOfflineCompiler/Main.cpp @@ -234,7 +234,8 @@ static std::optional GenerateSingleCache(FEXCore::ExecutableFileInf size_t MapSize {}; auto Entry = SyscallHandler->LookupExecutableFileSection(Thread, SyscallHandler->VAFileStart).value(); - CTX->GetCodeCache().SaveData(*Thread, Entry, 0 /* TODO: Use static base address information if available */, [&](size_t TotalSize) -> void* { + FEXCore::Core::InternalThreadState* Threads[] = {Thread}; + CTX->GetCodeCache().SaveData(Threads, Entry, 0 /* TODO: Use static base address information if available */, [&](size_t TotalSize) -> void* { if (ftruncate(fd, TotalSize) != 0) { return nullptr; } From 5a8c6582023679713a4393a37216f431647613d2 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 17 May 2026 23:45:52 +0100 Subject: [PATCH 11/12] Reapply "CodeCache: Use defaulted dtor for ExecutableFileInfo" This reverts commit 44a6bfd8740f68c93df8121e03ccbbedaab8e42a. --- FEXCore/Source/Interface/Core/CodeCache.cpp | 2 -- FEXCore/include/FEXCore/Core/CodeCache.h | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/FEXCore/Source/Interface/Core/CodeCache.cpp b/FEXCore/Source/Interface/Core/CodeCache.cpp index 40faa82387..9b8c6c227e 100644 --- a/FEXCore/Source/Interface/Core/CodeCache.cpp +++ b/FEXCore/Source/Interface/Core/CodeCache.cpp @@ -16,7 +16,6 @@ #include #include -#include #include #include @@ -39,7 +38,6 @@ ExecutableFileInfo::ExecutableFileInfo(fextl::unique_ptr Map , FileId(FileId) , Filename(Filename) {} #endif -ExecutableFileInfo::~ExecutableFileInfo() = default; MappedCodeCacheFile::~MappedCodeCacheFile() { if (CacheManager) { diff --git a/FEXCore/include/FEXCore/Core/CodeCache.h b/FEXCore/include/FEXCore/Core/CodeCache.h index ff3dbfa6ae..e011d92eba 100644 --- a/FEXCore/include/FEXCore/Core/CodeCache.h +++ b/FEXCore/include/FEXCore/Core/CodeCache.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -37,8 +38,6 @@ enum class GuestRelocationType : uint32_t { // Generic information associated with an executable file. struct ExecutableFileInfo { - ~ExecutableFileInfo(); - #if __clang_major__ < 16 // Workaround for broken aggregate-initialization with std::piecewise_construct ExecutableFileInfo(fextl::unique_ptr, uint64_t, fextl::string); From 4e78fc75319b0f9c6ccec3cf8c999362010fb360 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 17 May 2026 22:11:28 +0100 Subject: [PATCH 12/12] Windows: Implement WOW64/ARM64EC offline compiler backend Implements an offline JIT compiler backend that compiles x86 code blocks from a given code map into an ARM64 code cache on Windows. Separate binaries are built for WOW64 (32-bit) and ARM64EC (64-bit) targets. --- Source/Windows/CMakeLists.txt | 2 + .../OfflineCompilerBackend/CMakeLists.txt | 32 ++ .../Windows/OfflineCompilerBackend/Main.cpp | 507 ++++++++++++++++++ 3 files changed, 541 insertions(+) create mode 100644 Source/Windows/OfflineCompilerBackend/CMakeLists.txt create mode 100644 Source/Windows/OfflineCompilerBackend/Main.cpp diff --git a/Source/Windows/CMakeLists.txt b/Source/Windows/CMakeLists.txt index ad99ea1647..8f2a8fcc12 100644 --- a/Source/Windows/CMakeLists.txt +++ b/Source/Windows/CMakeLists.txt @@ -29,3 +29,5 @@ if (ARCHITECTURE_arm64ec) elseif (ARCHITECTURE_arm64) add_subdirectory(WOW64) endif() + +add_subdirectory(OfflineCompilerBackend) diff --git a/Source/Windows/OfflineCompilerBackend/CMakeLists.txt b/Source/Windows/OfflineCompilerBackend/CMakeLists.txt new file mode 100644 index 0000000000..10fe566a83 --- /dev/null +++ b/Source/Windows/OfflineCompilerBackend/CMakeLists.txt @@ -0,0 +1,32 @@ +if (ARCHITECTURE_arm64ec) + set(TARGET_NAME FEXOfflineCompilerBackendARM64EC) +elseif (ARCHITECTURE_arm64) + set(TARGET_NAME FEXOfflineCompilerBackendWOW64) +endif() +add_executable(${TARGET_NAME} Main.cpp $) +patch_library_wine(${TARGET_NAME}) + +target_include_directories(${TARGET_NAME} PRIVATE + "${CMAKE_SOURCE_DIR}/Source/Windows/include/" + "${CMAKE_SOURCE_DIR}/Source/" + "${CMAKE_SOURCE_DIR}/Source/Windows/" +) + +target_link_libraries(${TARGET_NAME} + PRIVATE + FEXCore_Base + Common + CommonTools + JemallocLibs + CommonWindows + ntdll_ex +) + +target_link_options(${TARGET_NAME} PRIVATE -static) +target_link_options(${TARGET_NAME} PRIVATE "LINKER:--image-base,0x600000000") + +install(TARGETS ${TARGET_NAME} + RUNTIME + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT runtime) + diff --git a/Source/Windows/OfflineCompilerBackend/Main.cpp b/Source/Windows/OfflineCompilerBackend/Main.cpp new file mode 100644 index 0000000000..b7e01757df --- /dev/null +++ b/Source/Windows/OfflineCompilerBackend/Main.cpp @@ -0,0 +1,507 @@ +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Common/ArgumentLoader.h" +#include "Common/Config.h" +#include "Common/JITGuardPage.h" +#include "Common/CPUFeatures.h" +#include "Common/Handle.h" +#include "Common/ImageTracker.h" +#include "Common/InvalidationTracker.h" +#include "Common/Logging.h" +#include "Common/Module.h" +#include "Common/OvercommitTracker.h" +#include "Common/PortabilityInfo.h" + +#include "DummyHandlers.h" + + +namespace { +std::optional InvalidationTracker; +std::optional ImageTracker; +std::optional OvercommitTracker; +thread_local FEXCore::Core::InternalThreadState* ThisThread {}; + +struct ImageInfo { + FEXCore::ExecutableFileInfo Info; + FEXCore::CodeMap::ParsedContents Contents; + bool RecompileCode {}; + + bool CheckNeedsRecompile(const std::filesystem::path& CodeMapPath, const std::filesystem::path& CachePath) { + std::error_code ec; + const auto CodeMapTime = std::filesystem::last_write_time(CodeMapPath, ec); + if (ec) { + return true; + } + + const auto CacheTime = std::filesystem::last_write_time(CachePath, ec); + if (ec) { + return true; + } + + return CodeMapTime > CacheTime; + } +}; + +#ifdef _M_ARM64EC +static constexpr size_t DefaultCS {FEXCore::Core::CPUState::DEFAULT_USER_CS}; +#else +static constexpr size_t DefaultCS {4}; +#endif + +static FEXCore::Core::CPUState::gdt_segment GDTSegments[32] {}; + +static void InitializeGDT() { + auto& GDT = GDTSegments[DefaultCS]; + FEXCore::Core::CPUState::SetGDTBase(&GDT, 0); + FEXCore::Core::CPUState::SetGDTLimit(&GDT, 0xF'FFFFU); +#ifdef _M_ARM64EC + GDT.L = 1; + GDT.D = 0; +#else + GDT.L = 0; + GDT.D = 1; +#endif +} + +void InitializeThreadContext(FEXCore::Core::InternalThreadState* ThreadState) { + auto Frame = ThreadState->CurrentFrame; + Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT] = &GDTSegments[0]; + Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_LDT] = &GDTSegments[0]; + Frame->State.cs_idx = DefaultCS << 3; + Frame->State.cs_cached = FEXCore::Core::CPUState::CalculateGDTBase(GDTSegments[DefaultCS]); +} + +bool RelocateMappedImage(HMODULE Module) { + const auto* NtHeaders = reinterpret_cast(RtlImageNtHeader(Module)); + if (!NtHeaders) { + return false; + } + + const auto BaseAddress = reinterpret_cast(Module); + const auto PreferredBase = NtHeaders->OptionalHeader.ImageBase; + const auto Delta = static_cast(BaseAddress - PreferredBase); + + // Wine will automatically relocate all DLLs to their mapped address, but PE relocations must still be applied so + // FEXCore can correctly transform them into FEX relocations + if (Delta == 0) { + return true; + } + + ULONG RelocSize = 0; + auto* RelocBlock = + reinterpret_cast(RtlImageDirectoryEntryToData(Module, true, IMAGE_DIRECTORY_ENTRY_BASERELOC, &RelocSize)); + + if (!RelocBlock || RelocSize == 0) { + return true; + } + + // Reprotect all sections as RW to apply relocations, saving their prior protections + struct SectionPatchState { + void* Address; + SIZE_T Size; + DWORD PreviousProtection; + }; + std::vector SectionStates; + SectionStates.reserve(NtHeaders->FileHeader.NumberOfSections); + + auto* SectionHeader = IMAGE_FIRST_SECTION(NtHeaders); + const auto* SectionHeaderEnd = SectionHeader + NtHeaders->FileHeader.NumberOfSections; + for (; SectionHeader != SectionHeaderEnd; ++SectionHeader) { + if (SectionHeader->SizeOfRawData == 0) { + continue; + } + + const auto SecAddr = reinterpret_cast(BaseAddress + SectionHeader->VirtualAddress); + const SIZE_T SecSize = SectionHeader->Misc.VirtualSize; + + DWORD OldProt = 0; + if (!VirtualProtect(SecAddr, SecSize, PAGE_READWRITE, &OldProt)) { + for (const auto& State : SectionStates) { + DWORD Ignored; + VirtualProtect(State.Address, State.Size, State.PreviousProtection, &Ignored); + } + return false; + } + + SectionStates.push_back({SecAddr, SecSize, OldProt}); + } + + // Apply relocations to all sections + bool RelocSuccess = true; + const uintptr_t RelocEnd = reinterpret_cast(RelocBlock) + RelocSize; + const uint32_t ImageSize = NtHeaders->OptionalHeader.SizeOfImage; + + while (reinterpret_cast(RelocBlock) < RelocEnd && RelocBlock->SizeOfBlock) { + if (RelocBlock->VirtualAddress >= ImageSize) { + RelocSuccess = false; + break; + } + + const auto Count = (RelocBlock->SizeOfBlock - sizeof(IMAGE_BASE_RELOCATION)) / sizeof(USHORT); + const auto PageAddress = BaseAddress + RelocBlock->VirtualAddress; + + RelocBlock = LdrProcessRelocationBlock(PageAddress, Count, reinterpret_cast(RelocBlock + 1), Delta); + + if (!RelocBlock) { + RelocSuccess = false; + break; + } + } + + // Restore sections to previous protection states + for (const auto& State : SectionStates) { + DWORD Ignored; + VirtualProtect(State.Address, State.Size, State.PreviousProtection, &Ignored); + } + + if (!RelocSuccess) { + return false; + } + + LogMan::Msg::IFmt("Relocated image {:X} -> {:X}", PreferredBase, BaseAddress); + return true; +} + +#ifdef ARCHITECTURE_arm64ec +void* MapView(HANDLE SectionHandle) { + return MapViewOfFile(SectionHandle, FILE_MAP_EXECUTE | FILE_MAP_READ, 0, 0, 0); +} +#else +void* MapView(HANDLE SectionHandle) { + void* BaseAddress = nullptr; + SIZE_T ViewSize = 0; + LARGE_INTEGER Offset {}; + + // Map images in the lower 32-bits for WOW64 so relocations can be correctly applied + const ULONG_PTR ZeroBits = 0x7fffffff; + + NTSTATUS Status = + NtMapViewOfSection(SectionHandle, GetCurrentProcess(), &BaseAddress, ZeroBits, 0, &Offset, &ViewSize, ViewShare, 0, PAGE_EXECUTE_READ); + + if (Status < 0) { + return nullptr; + } + + return BaseAddress; +} +#endif + +struct MappedImage { + uint64_t BaseAddress; + FEXCore::ExecutableFileSectionInfo SectionInfo; + ImageInfo Info; +}; + +std::vector TryMapImages(std::unordered_map&& Images) { + std::vector Result; + + for (auto& [ID, Info] : Images) { + if (!Info.RecompileCode || Info.Contents.Blocks.empty()) { + continue; + } + + FEX::Windows::ScopedHandle File {CreateFileA(Info.Contents.Filename.c_str(), GENERIC_READ | SYNCHRONIZE, + FILE_SHARE_READ | FILE_SHARE_DELETE, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr)}; + if (!File) { + LogMan::Msg::EFmt("Couldn't find image: {}", Info.Contents.Filename); + continue; + } + + FEX::Windows::ScopedHandle Section {CreateFileMappingA(*File, nullptr, SEC_IMAGE | PAGE_EXECUTE_READ, 0, 0, nullptr)}; + if (!Section) { + LogMan::Msg::EFmt("Couldn't create section for image: {}", Info.Contents.Filename); + continue; + } + + void* Mapping = MapView(*Section); + if (!Mapping) { + LogMan::Msg::EFmt("Couldn't map section for image: {}", Info.Contents.Filename); + continue; + } + + if (!RelocateMappedImage(reinterpret_cast(Mapping))) { + LogMan::Msg::EFmt("Failed to apply image relocations"); + UnmapViewOfFile(Mapping); + continue; + } + + uint64_t BaseAddress = reinterpret_cast(Mapping); + LogMan::Msg::IFmt("Mapped image: {} @ {:X}", Info.Contents.Filename, BaseAddress); + + InvalidationTracker->HandleImageMap(FEX::Windows::BaseName(Info.Contents.Filename), BaseAddress); + auto SectionInfo = ImageTracker->HandleImageMap(Info.Contents.Filename, BaseAddress, Info.Contents.IsExecutable); + + Result.push_back(MappedImage {.BaseAddress = BaseAddress, .SectionInfo = SectionInfo, .Info = std::move(Info)}); + } + + return Result; +} + +} // namespace + +class AOTSyscallHandler : public FEXCore::HLE::SyscallHandler { +public: + AOTSyscallHandler() { + OSABI = FEXCore::HLE::SyscallOSABI::OS_GENERIC; + } + + uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) override { + return 0; + } + + std::optional LookupExecutableFileSection(FEXCore::Core::InternalThreadState*, uint64_t Address) override { + return ImageTracker->LookupExecutableFileSection(Address); + } + + void MarkGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override {} + + void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override {} + + void MarkOvercommitRange(uint64_t Start, uint64_t Length) override { + OvercommitTracker->MarkRange(Start, Length); + } + + void UnmarkOvercommitRange(uint64_t Start, uint64_t Length) override { + OvercommitTracker->UnmarkRange(Start, Length); + } + + FEXCore::HLE::ExecutableRangeInfo QueryGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Address) override { + return InvalidationTracker->QueryExecutableRange(Address); + } + + void PreCompile() override {} +}; + +LONG ExceptionHandler(_EXCEPTION_POINTERS* ExceptionInfo) { + if (ExceptionInfo->ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) { + const auto FaultAddress = static_cast(ExceptionInfo->ExceptionRecord->ExceptionInformation[1]); + if (OvercommitTracker->HandleAccessViolation(FaultAddress)) { + return EXCEPTION_CONTINUE_EXECUTION; + } + +#ifdef ARCHITECTURE_arm64ec + ARM64_NT_CONTEXT ArmContext {}; + auto* Context = &ArmContext; +#else + auto* Context = ExceptionInfo->ContextRecord; +#endif + if (FEX::Windows::JITGuardPage::HandleJITGuardPage(ThisThread, reinterpret_cast(FaultAddress), Context->X, + reinterpret_cast<__uint128_t*>(Context->V), &Context->Pc)) { +#ifdef ARCHITECTURE_arm64ec + auto* ECContext = reinterpret_cast(ExceptionInfo->ContextRecord); + ECContext->X0 = Context->X0; + ECContext->X19 = Context->X19; + ECContext->X20 = Context->X20; + ECContext->X21 = Context->X21; + ECContext->X22 = Context->X22; + ECContext->X25 = Context->X25; + ECContext->X26 = Context->X26; + ECContext->X27 = Context->X27; + ECContext->Fp = Context->Fp; + ECContext->Lr = Context->Lr; + ECContext->Sp = Context->Sp; + ECContext->Pc = Context->Pc; + + for (size_t i = 0; i < 8; ++i) { + memcpy(&reinterpret_cast<__uint128_t*>(ECContext->V)[8 + i], &reinterpret_cast<__uint128_t*>(Context->V)[8 + i], sizeof(uint64_t)); + } +#endif + return EXCEPTION_CONTINUE_EXECUTION; + } + } + return EXCEPTION_CONTINUE_SEARCH; +} + +int main(int argc, char** argv) { + if (argc < 4) { + fmt::print("Usage: {} \n", argv[0]); + return 1; + } + + fextl::string MainImageName {argv[1]}; + std::filesystem::path ImageCacheDir {argv[2]}; + std::ifstream CodeMap(argv[3], std::ios_base::binary); + if (!CodeMap) { + fmt::print("Could not open {}\n", argv[3]); + return 1; + } + + FEX::Config::LoadConfig(MainImageName, _environ, FEX::ReadPortabilityInformation()); + FEXCore::Config::ReloadMetaLayer(); + + FEX::Windows::Logging::Init(); +#ifdef _M_ARM64EC + FEXCore::Config::Set(FEXCore::Config::CONFIG_IS64BIT_MODE, "1"); +#else + FEXCore::Config::Set(FEXCore::Config::CONFIG_IS64BIT_MODE, "0"); +#endif + + const auto CacheDir = std::filesystem::path(FEX::Config::GetCacheDirectory()); + const auto ReadyDir = CacheDir / "codemap" / "ready"; + const auto MetadataPath = ImageCacheDir / "metadata"; + + const auto NtDll = GetModuleHandle("ntdll.dll"); + const bool IsWine = !!GetProcAddress(NtDll, "wine_get_version"); + + auto HostFeatures = FEX::Windows::CPUFeatures::FetchHostFeatures(IsWine); + + std::unordered_map Images; + std::unordered_set ToPreserve; + + auto Parsed = FEXCore::CodeMap::ParseCodeMap(CodeMap); + + for (auto& [ID, Contents] : Parsed) { + FEXCore::ExecutableFileInfo FEXInfo {.FileId = ID, .Filename = Contents.Filename}; + + const auto BaseFilename = FEXCore::CodeMap::GetBaseFilename(FEXInfo, false); + const auto LibCodeMapPath = ReadyDir / BaseFilename; + const auto LibCachePath = ImageCacheDir / BaseFilename; + + // Handle dependencies by loading their specific codemap + if (!Contents.IsExecutable && Contents.Blocks.empty()) { + std::ifstream DepCodeMap(LibCodeMapPath, std::ios_base::binary); + if (!DepCodeMap) { + fmt::print("Could not open dependency codemap: {}\n", LibCodeMapPath.string()); + } else { + auto DepParsed = FEXCore::CodeMap::ParseCodeMap(DepCodeMap); + if (auto DepIt = DepParsed.find(ID); DepIt != DepParsed.end()) { + Contents = std::move(DepIt->second); + } + } + } + + LogMan::Msg::IFmt("Parsed {} codemap entries for {} ({})", Contents.Blocks.size(), Contents.Filename, ID); + + auto [It, _] = Images.emplace(ID, ImageInfo {.Info = std::move(FEXInfo), .Contents = std::move(Contents), .RecompileCode = false}); + + auto& CurrentImage = It->second; + CurrentImage.RecompileCode = CurrentImage.CheckNeedsRecompile(LibCodeMapPath, LibCachePath); + ToPreserve.emplace(BaseFilename); + } + + OvercommitTracker.emplace(IsWine); + + fextl::unique_ptr SignalDelegator = fextl::make_unique(); + fextl::unique_ptr SyscallHandler = fextl::make_unique(); + fextl::unique_ptr CTX = FEXCore::Context::Context::CreateNewContext(HostFeatures); + CTX->SetSignalDelegator(SignalDelegator.get()); + CTX->SetSyscallHandler(SyscallHandler.get()); + CTX->InitCore(); + CTX->GetCodeCache().InitiateCacheGeneration(); + + AddVectoredExceptionHandler(1, ExceptionHandler); + + InitializeGDT(); + + std::unordered_map Threads; + InvalidationTracker.emplace(*CTX, Threads); + ImageTracker.emplace(*CTX, true); + + ThisThread = CTX->CreateThread(0, 0); + InitializeThreadContext(ThisThread); + + auto MappedImages = TryMapImages(std::move(Images)); + + if (!std::filesystem::exists(ImageCacheDir)) { + if (!std::filesystem::create_directories(ImageCacheDir)) { + LogMan::Msg::EFmt("Error creating directory {}", ImageCacheDir.string()); + return 1; + } + } + + for (auto& Image : MappedImages) { + LogMan::Msg::IFmt("Compiling module {}: {} entrypoints", Image.Info.Contents.Filename, Image.Info.Contents.Blocks.size()); + CTX->ClearCodeCache(ThisThread, true); + + for (const auto Block : Image.Info.Contents.Blocks) { + CTX->CompileRIP(ThisThread, Image.BaseAddress + Block); + } + + auto Filename = ImageCacheDir / FEXCore::CodeMap::GetBaseFilename(Image.SectionInfo.FileInfo, false); + auto StagingFilename = Filename; + StagingFilename += ".new"; + + FEX::Windows::ScopedHandle FileHandle { + CreateFileW(StagingFilename.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr)}; + if (FileHandle) { + FEX::Windows::ScopedHandle MapHandle {}; + void* MappedPtr {}; + + FEXCore::Core::InternalThreadState* CompilerThreads[] = {ThisThread}; + bool Success = CTX->GetCodeCache().SaveData(CompilerThreads, Image.SectionInfo, Image.BaseAddress, [&](size_t TotalSize) -> void* { + LARGE_INTEGER LiSize; + LiSize.QuadPart = TotalSize; + if (!SetFilePointerEx(*FileHandle, LiSize, nullptr, FILE_BEGIN)) { + return nullptr; + } + if (!SetEndOfFile(*FileHandle)) { + return nullptr; + } + MapHandle.reset(CreateFileMappingW(*FileHandle, nullptr, PAGE_READWRITE, 0, 0, nullptr)); + if (!MapHandle) { + return nullptr; + } + MappedPtr = MapViewOfFile(*MapHandle, FILE_MAP_WRITE, 0, 0, 0); + return MappedPtr; + }); + + if (MappedPtr) { + UnmapViewOfFile(MappedPtr); + } + + MapHandle.reset(); + FileHandle.reset(); + if (Success) { + if (!MoveFileExW(StagingFilename.c_str(), Filename.c_str(), MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) { + LogMan::Msg::EFmt("Failed to replace cache file {}: {}", Filename.string(), GetLastError()); + std::filesystem::remove(StagingFilename); + } + } else { + std::filesystem::remove(StagingFilename); + LogMan::Msg::EFmt("Failed to save code cache data"); + } + } else { + LogMan::Msg::EFmt("Failed to open output file: {}", StagingFilename.string()); + } + } + + for (const auto& Entry : std::filesystem::directory_iterator(ImageCacheDir)) { + if (Entry.is_regular_file()) { + const auto FileName = Entry.path().filename().string(); + if (ToPreserve.find(FileName) == ToPreserve.end()) { + std::error_code ec; + if (std::filesystem::remove(Entry.path(), ec)) { + LogMan::Msg::IFmt("Deleted stale file: {}", FileName); + } + } + } + } + + LogMan::Msg::IFmt("Done Compiling"); + return 0; +}