[add] first

2023-10-08 10:24:48 +08:00
commit b1ae0510a9
1048 changed files with 3254361 additions and 0 deletions
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_CappedSemaphore_FutexBased.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_CappedSemaphore_FutexBased.inl.h
@@ -0,0 +1,152 @@
+#pragma once
+
+#include "../Baselib_CountdownTimer.h"
+#include "../Baselib_Atomic_TypeSafe.h"
+#include "../Baselib_SystemFutex.h"
+#include "../Baselib_Thread.h"
+
+#if !PLATFORM_FUTEX_NATIVE_SUPPORT
+    #error "Only use this implementation on top of a proper futex, in all other situations us Baselib_CappedSemaphore_SemaphoreBased.inl.h"
+#endif
+
+// Space out to different cache lines.
+// the idea here is that threads waking up from sleep should not have to
+// access the cache line where count is stored, and only touch wakeups.
+// the only exception to that rule is if we hit a timeout.
+typedef struct Baselib_CappedSemaphore
+{
+    int32_t wakeups;
+    char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
+    int32_t count;
+    const int32_t cap;
+    char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t) * 2]; // Having cap on the same cacheline is fine since it is a constant.
+} Baselib_CappedSemaphore;
+
+BASELIB_STATIC_ASSERT(sizeof(Baselib_CappedSemaphore) == PLATFORM_CACHE_LINE_SIZE * 2, "Baselib_CappedSemaphore (Futex) size should match 2*cacheline size (128bytes)");
+BASELIB_STATIC_ASSERT(offsetof(Baselib_CappedSemaphore, wakeups) ==
+    (offsetof(Baselib_CappedSemaphore, count) - PLATFORM_CACHE_LINE_SIZE), "Baselib_CappedSemaphore (futex) wakeups and count shouldnt share cacheline");
+
+
+BASELIB_INLINE_API Baselib_CappedSemaphore Baselib_CappedSemaphore_Create(const uint16_t cap)
+{
+    Baselib_CappedSemaphore semaphore = { 0, {0}, 0, cap, {0} };
+    return semaphore;
+}
+
+BASELIB_INLINE_API bool Detail_Baselib_CappedSemaphore_ConsumeWakeup(Baselib_CappedSemaphore* semaphore)
+{
+    int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->wakeups);
+    while (previousCount > 0)
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->wakeups, &previousCount, previousCount - 1))
+            return true;
+    }
+    return false;
+}
+
+BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryAcquire(Baselib_CappedSemaphore* semaphore)
+{
+    int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
+    while (previousCount > 0)
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
+            return true;
+    }
+    return false;
+}
+
+BASELIB_INLINE_API void Baselib_CappedSemaphore_Acquire(Baselib_CappedSemaphore* semaphore)
+{
+    const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return;
+
+    while (!Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore))
+    {
+        Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, UINT32_MAX);
+    }
+}
+
+BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryTimedAcquire(Baselib_CappedSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
+{
+    const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return true;
+
+    if (Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore))
+        return true;
+
+    uint32_t timeLeft = timeoutInMilliseconds;
+    const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
+    do
+    {
+        Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, timeLeft);
+        if (Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore))
+            return true;
+        timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
+    }
+    while (timeLeft);
+
+    // When timeout occurs we need to make sure we do one of the following:
+    // Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
+    //
+    // If count is not negative it's likely we are racing with a release operation in which case we
+    // may end up having a successful acquire operation.
+    do
+    {
+        int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
+        while (count < 0)
+        {
+            if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
+                return false;
+        }
+        // Likely a race, yield to give the release operation room to complete.
+        // This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
+        Baselib_Thread_YieldExecution();
+    }
+    while (!Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore));
+    return true;
+}
+
+BASELIB_INLINE_API uint16_t Baselib_CappedSemaphore_Release(Baselib_CappedSemaphore* semaphore,  const uint16_t _count)
+{
+    int32_t count = _count;
+    int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
+    do
+    {
+        if (previousCount == semaphore->cap)
+            return 0;
+
+        if (previousCount + count > semaphore->cap)
+            count = semaphore->cap - previousCount;
+    }
+    while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->count, &previousCount, previousCount + count));
+
+    if (OPTIMIZER_UNLIKELY(previousCount < 0))
+    {
+        const int32_t waitingThreads = -previousCount;
+        const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
+        Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
+        Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
+    }
+    return count;
+}
+
+BASELIB_INLINE_API uint32_t Baselib_CappedSemaphore_ResetAndReleaseWaitingThreads(Baselib_CappedSemaphore* semaphore)
+{
+    const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
+    if (OPTIMIZER_LIKELY(count >= 0))
+        return 0;
+    const int32_t threadsToWakeup = -count;
+    Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
+    Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_All);
+    return threadsToWakeup;
+}
+
+BASELIB_INLINE_API void Baselib_CappedSemaphore_Free(Baselib_CappedSemaphore* semaphore)
+{
+    if (!semaphore)
+        return;
+    const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
+    BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
+}
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_CappedSemaphore_SemaphoreBased.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_CappedSemaphore_SemaphoreBased.inl.h
@@ -0,0 +1,122 @@
+#pragma once
+
+#include "../Baselib_Atomic_TypeSafe.h"
+#include "../Baselib_SystemSemaphore.h"
+#include "../Baselib_Thread.h"
+
+#if PLATFORM_FUTEX_NATIVE_SUPPORT
+    #error "It's highly recommended to use Baselib_CappedSemaphore_FutexBased.inl.h on platforms which has native semaphore support"
+#endif
+
+typedef struct Baselib_CappedSemaphore
+{
+    Baselib_SystemSemaphore_Handle handle;
+    int32_t count;
+    const int32_t cap;
+    // Make the capped semaphore take a full cache line so that if the user cacheline aligned semaphore,
+    // llsc operations on count will not spuriously fail.
+    char _cachelineSpacer[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t) * 2 - sizeof(Baselib_SystemSemaphore_Handle)];
+    char _systemSemaphoreData[Baselib_SystemSemaphore_PlatformSize];
+} Baselib_CappedSemaphore;
+
+BASELIB_STATIC_ASSERT((offsetof(Baselib_CappedSemaphore, count) + PLATFORM_CACHE_LINE_SIZE - sizeof(Baselib_SystemSemaphore_Handle)) ==
+    offsetof(Baselib_CappedSemaphore, _systemSemaphoreData), "count and internalData must not share cacheline");
+
+BASELIB_INLINE_API Baselib_CappedSemaphore Baselib_CappedSemaphore_Create(uint16_t cap)
+{
+    Baselib_CappedSemaphore semaphore = {{0}, 0, cap, {0}, {0}};
+    semaphore.handle = Baselib_SystemSemaphore_CreateInplace(&semaphore._systemSemaphoreData);
+    return semaphore;
+}
+
+BASELIB_INLINE_API void Baselib_CappedSemaphore_Acquire(Baselib_CappedSemaphore* semaphore)
+{
+    const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return;
+
+    Baselib_SystemSemaphore_Acquire(semaphore->handle);
+}
+
+BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryAcquire(Baselib_CappedSemaphore* semaphore)
+{
+    int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
+    while (previousCount > 0)
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
+            return true;
+    }
+    return false;
+}
+
+BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryTimedAcquire(Baselib_CappedSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
+{
+    const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return true;
+
+    if (OPTIMIZER_LIKELY(Baselib_SystemSemaphore_TryTimedAcquire(semaphore->handle, timeoutInMilliseconds)))
+        return true;
+
+    // When timeout occurs we need to make sure we do one of the following:
+    // Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
+    //
+    // If count is not negative it's likely we are racing with a release operation in which case we
+    // may end up having a successful acquire operation.
+    do
+    {
+        int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
+        while (count < 0)
+        {
+            if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
+                return false;
+        }
+        // Likely a race, yield to give the release operation room to complete.
+        // This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
+        Baselib_Thread_YieldExecution();
+    }
+    while (!Baselib_SystemSemaphore_TryAcquire(semaphore->handle));
+    return true;
+}
+
+BASELIB_INLINE_API uint16_t Baselib_CappedSemaphore_Release(Baselib_CappedSemaphore* semaphore, const uint16_t _count)
+{
+    int32_t count = _count;
+    int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
+    do
+    {
+        if (previousCount == semaphore->cap)
+            return 0;
+
+        if (previousCount + count > semaphore->cap)
+            count = semaphore->cap - previousCount;
+    }
+    while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->count, &previousCount, previousCount + count));
+
+    if (OPTIMIZER_UNLIKELY(previousCount < 0))
+    {
+        const int32_t waitingThreads = -previousCount;
+        const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
+        Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
+    }
+    return count;
+}
+
+BASELIB_INLINE_API uint32_t Baselib_CappedSemaphore_ResetAndReleaseWaitingThreads(Baselib_CappedSemaphore* semaphore)
+{
+    const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
+    if (OPTIMIZER_LIKELY(count >= 0))
+        return 0;
+    const int32_t threadsToWakeup = -count;
+    Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
+    return threadsToWakeup;
+}
+
+BASELIB_INLINE_API void Baselib_CappedSemaphore_Free(Baselib_CappedSemaphore* semaphore)
+{
+    if (!semaphore)
+        return;
+    const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
+    BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
+    Baselib_SystemSemaphore_FreeInplace(semaphore->handle);
+}
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_EnumSizeCheck.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_EnumSizeCheck.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include "../Baselib_StaticAssert.h"
+
+#define BASELIB_ENUM_ENSURE_ABI_COMPATIBILITY(_enumType)  \
+    BASELIB_STATIC_ASSERT(sizeof(_enumType) == 4,         \
+        "Baselib assumes that sizeof any enum type is exactly 4 bytes, there might be ABI compatibility problems if violated");
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_EventSemaphore_FutexBased.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_EventSemaphore_FutexBased.inl.h
@@ -0,0 +1,198 @@
+#pragma once
+
+#include "../Baselib_CountdownTimer.h"
+#include "../Baselib_Atomic_TypeSafe.h"
+#include "../Baselib_SystemFutex.h"
+
+#if !PLATFORM_FUTEX_NATIVE_SUPPORT
+    #error "Only use this implementation on top of a proper futex, in all other situations us Baselib_EventSemaphore_SemaphoreBased.inl.h"
+#endif
+
+typedef struct Baselib_EventSemaphore
+{
+    int32_t state;
+    char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
+} Baselib_EventSemaphore;
+
+BASELIB_STATIC_ASSERT(sizeof(Baselib_EventSemaphore) == PLATFORM_CACHE_LINE_SIZE, "Baselib_EventSemaphore size should match cacheline size (64bytes)");
+
+// The futex based event semaphore is in one of *three* states:
+// * ResetNoWaitingThreads: EventSemaphore blocks threads, but there aren't any blocked yet
+// * Reset: EventSemaphore blocks threads and there are some already
+// * Set: EventSemaphore is not blocking any acquiring threads
+//
+// The ResetNoWaitingThreads state is an optimization that allows us to avoid the (comparatively) costly futex notification syscalls.
+//
+// In addition, there is a generation counter baked into the state variable in order to prevent lock stealing.
+// -> Any change in the state during acquire (other than going from ResetNoWaitingThreads to Reset) means that the thread can continue
+//    (since in this case either it was set on the current generation or the generation was changed which implies an earlier release operation)
+//
+// Allowed state transitions:
+// ResetNoWaitingThreads-Gen(X) -> Reset-Gen(X)                             == Acquire/TryTimedAcquire if no thread was waiting already
+// ResetNoWaitingThreads-Gen(X) -> Set-Gen(X)                               == Set but no thread was waiting
+// Reset-Gen(X)                 -> Set-Get(X+1)                             == Set if threads were waiting
+// Set-Get(X)                   -> ResetNoWaitingThreads-Gen(X)             == Reset/ResetAndReleaseWaitingThreads
+// Reset-Gen(X)                 -> ResetNoWaitingThreads-Gen(X+1)           == ResetAndReleaseWaitingThreads if threads were waiting
+//
+// Note how any state transition from Reset requires increasing the generation counter.
+
+enum
+{
+    //Detail_Baselib_EventSemaphore_ResetNoWaitingThreads = 0,
+    Detail_Baselib_EventSemaphore_Set     = (uint32_t)1 << 30,
+    Detail_Baselib_EventSemaphore_Reset   = (uint32_t)2 << 30,
+    Detail_Baselib_EventSemaphore_GenMask = ~((uint32_t)(1 | 2) << 30)
+};
+
+static FORCE_INLINE uint32_t Detail_Baselib_EventSemaphore_Generation(int32_t state)
+{
+    return state & Detail_Baselib_EventSemaphore_GenMask;
+}
+
+// If Detail_Baselib_EventSemaphore_ResetNoWaitingThreads is set, sets Detail_Baselib_EventSemaphore_Reset flag.
+// Returns last known state of the semaphore.
+// Does nothing if state changed while this function runs (that includes generation changes while attempting to set the ResetState!)
+static FORCE_INLINE uint32_t Detail_Baselib_EventSemaphore_TransitionFrom_ResetNoWaitingThreadsState_To_ResetState(Baselib_EventSemaphore* semaphore)
+{
+    int32_t state = Baselib_atomic_load_32_acquire(&semaphore->state);
+    const int32_t resetState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Reset;
+    const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
+    while (state == resetNoWaitingThreadsState)
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->state, &state, resetState))
+            return resetState;
+    }
+    return state;
+}
+
+BASELIB_INLINE_API Baselib_EventSemaphore Baselib_EventSemaphore_Create(void)
+{
+    const Baselib_EventSemaphore semaphore = { 0, {0} };
+    return semaphore;
+}
+
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_INLINE_API bool Baselib_EventSemaphore_TryAcquire(Baselib_EventSemaphore* semaphore)
+{
+    const int32_t state = Baselib_atomic_load_32_acquire(&semaphore->state);
+    return state & Detail_Baselib_EventSemaphore_Set ? true : false;
+}
+
+BASELIB_INLINE_API void Baselib_EventSemaphore_Acquire(Baselib_EventSemaphore* semaphore)
+{
+    const int32_t state = Detail_Baselib_EventSemaphore_TransitionFrom_ResetNoWaitingThreadsState_To_ResetState(semaphore);
+    if (state & Detail_Baselib_EventSemaphore_Set)
+        return;
+    do
+    {
+        // State is now in Detail_Baselib_EventSemaphore_Reset-Gen(X).
+        Baselib_SystemFutex_Wait(&semaphore->state, state, UINT32_MAX);
+        // If the state has changed in any way, it is now in either of
+        // Set-Gen(X), Set-Gen(X+n), ResetNoWaitingThreads-Gen(X+n) or Reset(X+n). (with n>0)
+        if (state != Baselib_atomic_load_32_relaxed(&semaphore->state))
+            return;
+    }
+    while (true);
+}
+
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_INLINE_API bool Baselib_EventSemaphore_TryTimedAcquire(Baselib_EventSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
+{
+    const int32_t state = Detail_Baselib_EventSemaphore_TransitionFrom_ResetNoWaitingThreadsState_To_ResetState(semaphore);
+    if (state & Detail_Baselib_EventSemaphore_Set)
+        return true;
+    uint32_t timeLeft = timeoutInMilliseconds;
+    const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
+    do
+    {
+        // State is now in Detail_Baselib_EventSemaphore_Reset-Gen(X).
+        Baselib_SystemFutex_Wait(&semaphore->state, state, timeLeft);
+        // If the state has changed in any way, it is now in either of
+        // Set-Gen(X), Set-Gen(X+n), ResetNoWaitingThreads-Gen(X+n) or Reset(X+n). (with n>0)
+        if (state != Baselib_atomic_load_32_relaxed(&semaphore->state))
+            return true;
+        timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
+    }
+    while (timeLeft);
+
+    // The EventSemaphore looks now like there are still threads waiting even if there *might* be none!
+    // This is not an issue however, since it merely means that Set/ResetAndReleaseWaitingThreads will do a potentially redundant futex notification.
+
+    return false;
+}
+
+BASELIB_INLINE_API void Baselib_EventSemaphore_Reset(Baselib_EventSemaphore* semaphore)
+{
+    int32_t state = Baselib_atomic_load_32_relaxed(&semaphore->state);
+    const int32_t setState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Set;
+    while (state == setState)
+    {
+        const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
+        if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, resetNoWaitingThreadsState))
+            return;
+    }
+    Baselib_atomic_thread_fence_release();
+}
+
+BASELIB_INLINE_API void Baselib_EventSemaphore_Set(Baselib_EventSemaphore* semaphore)
+{
+    int32_t state = Baselib_atomic_load_32_relaxed(&semaphore->state);
+    const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
+    const int32_t resetState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Reset;
+
+    // If there is no thread waiting on the semaphore, there is no need to wake & increase the generation count.
+    // Just set it to Set if it isn't already.
+    while (state == resetNoWaitingThreadsState)
+    {
+        const int32_t setState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Set;
+        if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, setState))
+            return;
+    }
+    // If this is not the case however, we do exactly that, increase the generation & wake all threads.
+    while (state == resetState)
+    {
+        const int32_t nextGenSetState = Detail_Baselib_EventSemaphore_Generation(state + 1) | Detail_Baselib_EventSemaphore_Set;
+        if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, nextGenSetState))
+        {
+            Baselib_SystemFutex_Notify(&semaphore->state, UINT32_MAX, Baselib_WakeupFallbackStrategy_All);
+            return;
+        }
+    }
+    // EventSemaphore was already in set state.
+    Baselib_atomic_thread_fence_release();
+}
+
+BASELIB_INLINE_API void Baselib_EventSemaphore_ResetAndReleaseWaitingThreads(Baselib_EventSemaphore* semaphore)
+{
+    // Note that doing a Baselib_EventSemaphore_Set & Baselib_EventSemaphore_Reset has the same observable effects, just slightly slower.
+
+    int32_t state = Baselib_atomic_load_32_relaxed(&semaphore->state);
+    const int32_t setState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Set;
+    const int32_t resetState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Reset;
+
+    // If there is no thread waiting on the semaphore, there is no need to wake & increase the generation count.
+    // Just set it to ResetNoWaitingThreads if it isn't already.
+    while (state == setState)
+    {
+        const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
+        if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, resetNoWaitingThreadsState))
+            return;
+    }
+    // If this is not the case however, we do exactly that, increase the generation & wake all threads.
+    while (state == resetState)
+    {
+        const int32_t nextGenPendingResetState = Detail_Baselib_EventSemaphore_Generation(state + 1);
+        if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->state, &state, nextGenPendingResetState))
+        {
+            Baselib_SystemFutex_Notify(&semaphore->state, UINT32_MAX, Baselib_WakeupFallbackStrategy_All);
+            return;
+        }
+    }
+
+    // EventSemaphore was already in ResetNoWaiting threads state.
+    Baselib_atomic_thread_fence_release();
+}
+
+BASELIB_INLINE_API void Baselib_EventSemaphore_Free(Baselib_EventSemaphore* semaphore)
+{
+}
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_EventSemaphore_SemaphoreBased.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_EventSemaphore_SemaphoreBased.inl.h
@@ -0,0 +1,211 @@
+#pragma once
+
+#include "../Baselib_CountdownTimer.h"
+#include "../Baselib_Atomic_TypeSafe.h"
+#include "../Baselib_SystemSemaphore.h"
+#include "../Baselib_StaticAssert.h"
+
+#if PLATFORM_FUTEX_NATIVE_SUPPORT
+    #error "It's highly recommended to use Baselib_EventSemaphore_FutexBased.inl.h on platforms which has native semaphore support"
+#endif
+
+typedef union BASELIB_ALIGN_AS (8) Detail_Baselib_EventSemaphore_State
+{
+    struct
+    {
+        // Can be changed without checking for changes in numWaitingForSetInProgress (use 32bit cmpex)
+        int32_t numWaitingForSetAndStateFlags;
+        // Typically not changed without checking numWaitingForSetAndStateFlags (use 64bit cmpex)
+        int32_t numWaitingForSetInProgress;
+    } parts;
+    int64_t stateInt64;
+} Detail_Baselib_EventSemaphore_State;
+
+enum
+{
+    // If this flag is set, threads are still waking up from a previous Set or ResetAndReleaseWaitingThreads call.
+    // While this is set, any thread entering an Acquire method (that doesn't see Detail_Baselib_EventSemaphore_SetFlag),
+    // will wait until it is cleared before proceeding with normal operations.
+    Detail_Baselib_EventSemaphore_SetInProgressFlag    = (uint32_t)1 << 30,
+
+    // If this flag is set, threads acquiring the semaphore succeed immediately.
+    Detail_Baselib_EventSemaphore_SetFlag              = (uint32_t)2 << 30,
+
+    Detail_Baselib_EventSemaphore_NumWaitingForSetMask = ~((uint32_t)(1 | 2) << 30)
+};
+
+typedef struct Baselib_EventSemaphore
+{
+    Detail_Baselib_EventSemaphore_State state;
+    Baselib_SystemSemaphore_Handle setSemaphore;
+    Baselib_SystemSemaphore_Handle setInProgressSemaphore;
+    char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - 2 * sizeof(Baselib_SystemSemaphore_Handle) - sizeof(Detail_Baselib_EventSemaphore_State)];
+    char _systemSemaphoreDataSemaphore[Baselib_SystemSemaphore_PlatformSize];
+    char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - Baselib_SystemSemaphore_PlatformSize];
+    char _systemSemaphoreDataInProgressSemaphore[Baselib_SystemSemaphore_PlatformSize];
+} Baselib_EventSemaphore;
+
+BASELIB_STATIC_ASSERT((offsetof(Baselib_EventSemaphore, state) + PLATFORM_CACHE_LINE_SIZE) ==
+    offsetof(Baselib_EventSemaphore, _systemSemaphoreDataSemaphore), "state and _systemSemaphoreDataSemaphore must not share cacheline");
+
+BASELIB_STATIC_ASSERT((offsetof(Baselib_EventSemaphore, _systemSemaphoreDataSemaphore) + PLATFORM_CACHE_LINE_SIZE) ==
+    offsetof(Baselib_EventSemaphore, _systemSemaphoreDataInProgressSemaphore), "_systemSemaphoreDataSemaphore and _systemSemaphoreDataInProgressSemaphore must not share cacheline");
+
+// How (Timed)Acquire works for the SemaphoreBased EventSemaphore:
+//
+// If there is a set pending (Detail_Baselib_EventSemaphore_SetInProgressFlag is set),
+// it means that not all threads from the previous wakeup call (either via Set or ResetAndReleaseWaitingThreads) have been woken up.
+// If we would just continue, we might steal the wakeup tokens of those threads! So instead we wait until they are done.
+//
+// This is different from the FutexBased version, however there is no way for a user to distinguish that from
+// a "regular (but lengthy)" preemption at the start of the function.
+// Meaning that we don't care how often the semaphore got set and reset in the meantime!
+//
+//
+// Invariants:
+//
+// Allowed flag state transitions:
+// 0                     -> Set | SetInProgress
+// Set | SetInProgress  <-> Set
+// Set | SetInProgress  <-> SetInProgress
+// Set                   -> 0
+// SetInProgress         -> 0
+//
+// Additionally:
+// * numWaitingForSetInProgress can only grow if SetInProgress is set.
+// * numWaitingForSet           can only grow if Set is set
+
+#ifdef __cplusplus
+BASELIB_C_INTERFACE
+{
+#endif
+
+BASELIB_API void Detail_Baselib_EventSemaphore_SemaphoreBased_AcquireNonSet(int32_t initialNumWaitingForSetAndStateFlags, Baselib_EventSemaphore* semaphore);
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_API bool Detail_Baselib_EventSemaphore_SemaphoreBased_TryTimedAcquireNonSet(int32_t initialNumWaitingForSetAndStateFlags, Baselib_EventSemaphore* semaphore, uint32_t timeoutInMilliseconds);
+
+#ifdef __cplusplus
+} // BASELIB_C_INTERFACE
+#endif
+
+
+static FORCE_INLINE bool Detail_Baselib_EventSemaphore_IsSet(int32_t numWaitingForSetAndStateFlags)
+{
+    return (numWaitingForSetAndStateFlags & Detail_Baselib_EventSemaphore_SetFlag) ? true : false;
+}
+
+static FORCE_INLINE bool Detail_Baselib_EventSemaphore_IsSetInProgress(int32_t numWaitingForSetAndStateFlags)
+{
+    return (numWaitingForSetAndStateFlags & Detail_Baselib_EventSemaphore_SetInProgressFlag) ? true : false;
+}
+
+static FORCE_INLINE int32_t Detail_Baselib_EventSemaphore_GetWaitingForSetCount(int32_t numWaitingForSetAndStateFlags)
+{
+    return numWaitingForSetAndStateFlags & Detail_Baselib_EventSemaphore_NumWaitingForSetMask;
+}
+
+// Changes WaitingForSet count without affecting state flags
+static FORCE_INLINE int32_t Detail_Baselib_EventSemaphore_SetWaitingForSetCount(int32_t currentNumWaitingForSetAndStateFlags, int32_t newNumWaitingForSet)
+{
+    return newNumWaitingForSet | (currentNumWaitingForSetAndStateFlags & (~Detail_Baselib_EventSemaphore_NumWaitingForSetMask));
+}
+
+BASELIB_INLINE_API Baselib_EventSemaphore Baselib_EventSemaphore_Create(void)
+{
+    Baselib_EventSemaphore semaphore = {{{0, 0}}, {0}, {0}, {0}, {0}, {0}, {0}};
+
+    semaphore.setSemaphore = Baselib_SystemSemaphore_CreateInplace(semaphore._systemSemaphoreDataSemaphore);
+    semaphore.setInProgressSemaphore = Baselib_SystemSemaphore_CreateInplace(semaphore._systemSemaphoreDataInProgressSemaphore);
+    return semaphore;
+}
+
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_INLINE_API bool Baselib_EventSemaphore_TryAcquire(Baselib_EventSemaphore* semaphore)
+{
+    const int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_acquire(&semaphore->state.parts.numWaitingForSetAndStateFlags);
+    return Detail_Baselib_EventSemaphore_IsSet(numWaitingForSetAndStateFlags);
+}
+
+BASELIB_INLINE_API void Baselib_EventSemaphore_Acquire(Baselib_EventSemaphore* semaphore)
+{
+    const int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_acquire(&semaphore->state.parts.numWaitingForSetAndStateFlags);
+    if (!Detail_Baselib_EventSemaphore_IsSet(numWaitingForSetAndStateFlags))
+        Detail_Baselib_EventSemaphore_SemaphoreBased_AcquireNonSet(numWaitingForSetAndStateFlags, semaphore);
+}
+
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_INLINE_API bool Baselib_EventSemaphore_TryTimedAcquire(Baselib_EventSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
+{
+    const int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_acquire(&semaphore->state.parts.numWaitingForSetAndStateFlags);
+    if (!Detail_Baselib_EventSemaphore_IsSet(numWaitingForSetAndStateFlags))
+        return Detail_Baselib_EventSemaphore_SemaphoreBased_TryTimedAcquireNonSet(numWaitingForSetAndStateFlags, semaphore, timeoutInMilliseconds);
+    return true;
+}
+
+BASELIB_INLINE_API void Baselib_EventSemaphore_Reset(Baselib_EventSemaphore* semaphore)
+{
+    int32_t resetNumWaitingForSetAndStateFlags;
+    int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_relaxed(&semaphore->state.parts.numWaitingForSetAndStateFlags);
+    do
+    {
+        resetNumWaitingForSetAndStateFlags = numWaitingForSetAndStateFlags & (~Detail_Baselib_EventSemaphore_SetFlag);
+    }
+    while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(
+        &semaphore->state.parts.numWaitingForSetAndStateFlags,
+        &numWaitingForSetAndStateFlags,
+        resetNumWaitingForSetAndStateFlags));
+}
+
+BASELIB_INLINE_API void Baselib_EventSemaphore_Set(Baselib_EventSemaphore* semaphore)
+{
+    int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_relaxed(&semaphore->state.parts.numWaitingForSetAndStateFlags);
+    int32_t numWaitingForSetAndStateFlagsSet, numWaitingForSet;
+
+    do
+    {
+        numWaitingForSetAndStateFlagsSet = numWaitingForSetAndStateFlags | Detail_Baselib_EventSemaphore_SetFlag;
+        numWaitingForSet = Detail_Baselib_EventSemaphore_GetWaitingForSetCount(numWaitingForSetAndStateFlags);
+        BaselibAssert(numWaitingForSet >= 0, "There needs to be always a non-negative amount of threads waiting for Set");
+        if (numWaitingForSet)
+            numWaitingForSetAndStateFlagsSet |= Detail_Baselib_EventSemaphore_SetInProgressFlag;
+    }
+    while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(
+        &semaphore->state.parts.numWaitingForSetAndStateFlags,
+        &numWaitingForSetAndStateFlags,
+        numWaitingForSetAndStateFlagsSet));
+
+    if (!Detail_Baselib_EventSemaphore_IsSetInProgress(numWaitingForSetAndStateFlags) && numWaitingForSet)
+        Baselib_SystemSemaphore_Release(semaphore->setSemaphore, numWaitingForSet);
+}
+
+BASELIB_INLINE_API void Baselib_EventSemaphore_ResetAndReleaseWaitingThreads(Baselib_EventSemaphore* semaphore)
+{
+    // Note that doing a Baselib_EventSemaphore_Set & Baselib_EventSemaphore_Reset has the same observable effects, just slightly slower.
+
+    int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_relaxed(&semaphore->state.parts.numWaitingForSetAndStateFlags);
+    int32_t resetNumWaitingForSetAndStateFlags, numWaitingForSet;
+    do
+    {
+        resetNumWaitingForSetAndStateFlags = numWaitingForSetAndStateFlags & (~Detail_Baselib_EventSemaphore_SetFlag);
+        numWaitingForSet = Detail_Baselib_EventSemaphore_GetWaitingForSetCount(numWaitingForSetAndStateFlags);
+        BaselibAssert(numWaitingForSet >= 0, "There needs to be always a non-negative amount of threads waiting for Set");
+        if (numWaitingForSet)
+            resetNumWaitingForSetAndStateFlags |= Detail_Baselib_EventSemaphore_SetInProgressFlag;
+    }
+    while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(
+        &semaphore->state.parts.numWaitingForSetAndStateFlags,
+        &numWaitingForSetAndStateFlags,
+        resetNumWaitingForSetAndStateFlags));
+
+    if (!Detail_Baselib_EventSemaphore_IsSetInProgress(numWaitingForSetAndStateFlags) && numWaitingForSet)
+        Baselib_SystemSemaphore_Release(semaphore->setSemaphore, numWaitingForSet);
+}
+
+BASELIB_INLINE_API void Baselib_EventSemaphore_Free(Baselib_EventSemaphore* semaphore)
+{
+    if (!semaphore)
+        return;
+
+    Baselib_SystemSemaphore_FreeInplace(semaphore->setSemaphore);
+    Baselib_SystemSemaphore_FreeInplace(semaphore->setInProgressSemaphore);
+}
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_HighCapacitySemaphore_FutexBased.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_HighCapacitySemaphore_FutexBased.inl.h
@@ -0,0 +1,150 @@
+#pragma once
+
+#include "../Baselib_CountdownTimer.h"
+#include "../Baselib_Atomic_TypeSafe.h"
+#include "../Baselib_SystemFutex.h"
+#include "../Baselib_Thread.h"
+
+#if !PLATFORM_FUTEX_NATIVE_SUPPORT
+    #error "Only use this implementation on top of a proper futex, in all other situations us Baselib_HighCapacitySemaphore_SemaphoreBased.inl.h"
+#endif
+
+// Space out to different cache lines.
+// the idea here is that threads waking up from sleep should not have to
+// access the cache line where count is stored, and only touch wakeups.
+// the only exception to that rule is if we hit a timeout.
+typedef struct Baselib_HighCapacitySemaphore
+{
+    int32_t wakeups;
+    char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int64_t)];
+    int64_t count;
+    char _cachelineSpacer2[PLATFORM_CACHE_LINE_SIZE - sizeof(int64_t)];
+} Baselib_HighCapacitySemaphore;
+
+BASELIB_INLINE_API Baselib_HighCapacitySemaphore Baselib_HighCapacitySemaphore_Create(void)
+{
+    Baselib_HighCapacitySemaphore semaphore = {0, {0}, 0, {0}};
+    return semaphore;
+}
+
+BASELIB_INLINE_API bool Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(Baselib_HighCapacitySemaphore* semaphore)
+{
+    int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->wakeups);
+    while (previousCount > 0)
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->wakeups, &previousCount, previousCount - 1))
+            return true;
+    }
+    return false;
+}
+
+BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryAcquire(Baselib_HighCapacitySemaphore* semaphore)
+{
+    int64_t previousCount = Baselib_atomic_load_64_relaxed(&semaphore->count);
+    while (previousCount > 0)
+    {
+        if (Baselib_atomic_compare_exchange_weak_64_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
+            return true;
+    }
+    return false;
+}
+
+BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Acquire(Baselib_HighCapacitySemaphore* semaphore)
+{
+    const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return;
+
+    while (!Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(semaphore))
+    {
+        Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, UINT32_MAX);
+    }
+}
+
+BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryTimedAcquire(Baselib_HighCapacitySemaphore* semaphore, const uint32_t timeoutInMilliseconds)
+{
+    const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return true;
+
+    uint32_t timeLeft = timeoutInMilliseconds;
+    const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
+    do
+    {
+        Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, timeLeft);
+        if (Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(semaphore))
+            return true;
+        timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
+    }
+    while (timeLeft);
+
+    // When timeout occurs we need to make sure we do one of the following:
+    // Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
+    //
+    // If count is not negative it's likely we are racing with a release operation in which case we
+    // may end up having a successful acquire operation.
+    do
+    {
+        int64_t count = Baselib_atomic_load_64_relaxed(&semaphore->count);
+        while (count < 0)
+        {
+            if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &count, count + 1))
+                return false;
+        }
+        // Likely a race, yield to give the release operation room to complete.
+        // This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
+        Baselib_Thread_YieldExecution();
+    }
+    while (!Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(semaphore));
+    return true;
+}
+
+BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Release(Baselib_HighCapacitySemaphore* semaphore, const uint32_t _count)
+{
+    const int64_t count = _count;
+    int64_t previousCount = Baselib_atomic_fetch_add_64_release(&semaphore->count, count);
+
+    // This should only be possible if millions of threads enter this function simultaneously posting with a high count.
+    // See overflow protection below.
+    BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", (int32_t)previousCount, (int32_t)count);
+
+    if (OPTIMIZER_UNLIKELY(previousCount < 0))
+    {
+        const int64_t waitingThreads = -previousCount;
+        const int64_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
+        BaselibAssert(threadsToWakeup <= INT32_MAX);
+        Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, (int32_t)threadsToWakeup);
+        Baselib_SystemFutex_Notify(&semaphore->wakeups, (int32_t)threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
+        return;
+    }
+
+    // overflow protection
+    // we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
+    // this way we won't have to do clamping on every iteration
+    while (OPTIMIZER_UNLIKELY(previousCount > Baselib_HighCapacitySemaphore_MaxGuaranteedCount * 2))
+    {
+        const int64_t maxCount = Baselib_HighCapacitySemaphore_MaxGuaranteedCount;
+        if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
+            return;
+    }
+}
+
+BASELIB_INLINE_API uint64_t Baselib_HighCapacitySemaphore_ResetAndReleaseWaitingThreads(Baselib_HighCapacitySemaphore* semaphore)
+{
+    const int64_t count = Baselib_atomic_exchange_64_release(&semaphore->count, 0);
+    if (OPTIMIZER_LIKELY(count >= 0))
+        return 0;
+    const int64_t threadsToWakeup = -count;
+    BaselibAssert(threadsToWakeup <= INT32_MAX);
+    Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, (int32_t)threadsToWakeup);
+    Baselib_SystemFutex_Notify(&semaphore->wakeups, (int32_t)threadsToWakeup, Baselib_WakeupFallbackStrategy_All);
+    return threadsToWakeup;
+}
+
+BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Free(Baselib_HighCapacitySemaphore* semaphore)
+{
+    if (!semaphore)
+        return;
+    const int64_t count = Baselib_atomic_load_64_seq_cst(&semaphore->count);
+    BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
+}
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_HighCapacitySemaphore_SemaphoreBased.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_HighCapacitySemaphore_SemaphoreBased.inl.h
@@ -0,0 +1,126 @@
+#pragma once
+
+#include "../Baselib_Atomic_TypeSafe.h"
+#include "../Baselib_SystemSemaphore.h"
+#include "../Baselib_Thread.h"
+
+#if PLATFORM_FUTEX_NATIVE_SUPPORT
+    #error "It's highly recommended to use Baselib_HighCapacitySemaphore_FutexBased.inl.h on platforms which has native semaphore support"
+#endif
+
+typedef struct Baselib_HighCapacitySemaphore
+{
+    int64_t count;
+    Baselib_SystemSemaphore_Handle handle;
+    char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int64_t) - sizeof(Baselib_SystemSemaphore_Handle)];
+    char _systemSemaphoreData[Baselib_SystemSemaphore_PlatformSize];
+} Baselib_HighCapacitySemaphore;
+
+BASELIB_STATIC_ASSERT((offsetof(Baselib_HighCapacitySemaphore, count) + PLATFORM_CACHE_LINE_SIZE) ==
+    offsetof(Baselib_HighCapacitySemaphore, _systemSemaphoreData), "count and internalData must not share cacheline");
+
+BASELIB_INLINE_API Baselib_HighCapacitySemaphore Baselib_HighCapacitySemaphore_Create(void)
+{
+    Baselib_HighCapacitySemaphore semaphore = {0, {0}, {0}, {0}};
+    semaphore.handle = Baselib_SystemSemaphore_CreateInplace(&semaphore._systemSemaphoreData);
+    return semaphore;
+}
+
+BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryAcquire(Baselib_HighCapacitySemaphore* semaphore)
+{
+    int64_t previousCount = Baselib_atomic_load_64_relaxed(&semaphore->count);
+    while (previousCount > 0)
+    {
+        if (Baselib_atomic_compare_exchange_weak_64_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
+            return true;
+    }
+    return false;
+}
+
+BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Acquire(Baselib_HighCapacitySemaphore* semaphore)
+{
+    const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return;
+
+    Baselib_SystemSemaphore_Acquire(semaphore->handle);
+}
+
+BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryTimedAcquire(Baselib_HighCapacitySemaphore* semaphore, const uint32_t timeoutInMilliseconds)
+{
+    const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return true;
+
+    if (OPTIMIZER_LIKELY(Baselib_SystemSemaphore_TryTimedAcquire(semaphore->handle, timeoutInMilliseconds)))
+        return true;
+
+    // When timeout occurs we need to make sure we do one of the following:
+    // Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
+    //
+    // If count is not negative it's likely we are racing with a release operation in which case we
+    // may end up having a successful acquire operation.
+    do
+    {
+        int64_t count = Baselib_atomic_load_64_relaxed(&semaphore->count);
+        while (count < 0)
+        {
+            if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &count, count + 1))
+                return false;
+        }
+        // Likely a race, yield to give the release operation room to complete.
+        // This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
+        Baselib_Thread_YieldExecution();
+    }
+    while (!Baselib_SystemSemaphore_TryAcquire(semaphore->handle));
+    return true;
+}
+
+BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Release(Baselib_HighCapacitySemaphore* semaphore, const uint32_t _count)
+{
+    const int64_t count = _count;
+    int64_t previousCount = Baselib_atomic_fetch_add_64_release(&semaphore->count, count);
+
+    // This should only be possible if millions of threads enter this function simultaneously posting with a high count.
+    // See overflow protection below.
+    BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", (int32_t)previousCount, (int32_t)count);
+
+    if (OPTIMIZER_UNLIKELY(previousCount < 0))
+    {
+        const int64_t waitingThreads = -previousCount;
+        const int64_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
+        BaselibAssert(threadsToWakeup <= (int64_t)UINT32_MAX);
+        Baselib_SystemSemaphore_Release(semaphore->handle, (uint32_t)threadsToWakeup);
+        return;
+    }
+
+    // overflow protection
+    // we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
+    // this way we won't have to do clamping on every iteration
+    while (OPTIMIZER_UNLIKELY(previousCount > Baselib_HighCapacitySemaphore_MaxGuaranteedCount * 2))
+    {
+        const int64_t maxCount = Baselib_HighCapacitySemaphore_MaxGuaranteedCount;
+        if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
+            return;
+    }
+}
+
+BASELIB_INLINE_API uint64_t Baselib_HighCapacitySemaphore_ResetAndReleaseWaitingThreads(Baselib_HighCapacitySemaphore* semaphore)
+{
+    const int64_t count = Baselib_atomic_exchange_64_release(&semaphore->count, 0);
+    if (OPTIMIZER_LIKELY(count >= 0))
+        return 0;
+    const int64_t threadsToWakeup = -count;
+    BaselibAssert(threadsToWakeup <= (int64_t)UINT32_MAX);
+    Baselib_SystemSemaphore_Release(semaphore->handle, (uint32_t)threadsToWakeup);
+    return threadsToWakeup;
+}
+
+BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Free(Baselib_HighCapacitySemaphore* semaphore)
+{
+    if (!semaphore)
+        return;
+    const int64_t count = Baselib_atomic_load_64_seq_cst(&semaphore->count);
+    BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
+    Baselib_SystemSemaphore_FreeInplace(semaphore->handle);
+}
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_Lock_FutexBased.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_Lock_FutexBased.inl.h
@@ -0,0 +1,92 @@
+#pragma once
+
+#include "../Baselib_CountdownTimer.h"
+#include "../Baselib_Atomic_TypeSafe.h"
+#include "../Baselib_SystemFutex.h"
+
+enum Detail_Baselib_Lock_State
+{
+    Detail_Baselib_Lock_UNLOCKED    = 0,
+    Detail_Baselib_Lock_LOCKED      = 1,
+    Detail_Baselib_Lock_CONTENDED   = 2,
+};
+typedef struct Baselib_Lock
+{
+    int32_t state;
+    char _cachelineSpacer[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
+} Baselib_Lock;
+
+BASELIB_INLINE_API Baselib_Lock Baselib_Lock_Create(void)
+{
+    Baselib_Lock lock = {Detail_Baselib_Lock_UNLOCKED, {0}};
+    return lock;
+}
+
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_INLINE_API bool Baselib_Lock_TryAcquire(Baselib_Lock* lock)
+{
+    int32_t previousState = Detail_Baselib_Lock_UNLOCKED;
+    do
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&lock->state, &previousState, Detail_Baselib_Lock_LOCKED))
+            return true;
+    }
+    while (previousState == Detail_Baselib_Lock_UNLOCKED);
+    return false;
+}
+
+BASELIB_INLINE_API void Baselib_Lock_Acquire(Baselib_Lock* lock)
+{
+    int32_t previousState = Detail_Baselib_Lock_UNLOCKED;
+    do
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&lock->state, &previousState, previousState + 1))
+            break;
+    }
+    while (previousState != Detail_Baselib_Lock_CONTENDED);
+
+    while (OPTIMIZER_LIKELY(previousState != Detail_Baselib_Lock_UNLOCKED))
+    {
+        Baselib_SystemFutex_Wait(&lock->state, Detail_Baselib_Lock_CONTENDED, UINT32_MAX);
+        previousState = Baselib_atomic_exchange_32_relaxed(&lock->state, Detail_Baselib_Lock_CONTENDED);
+    }
+}
+
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_INLINE_API bool Baselib_Lock_TryTimedAcquire(Baselib_Lock* lock, const uint32_t timeoutInMilliseconds)
+{
+    int32_t previousState = Detail_Baselib_Lock_UNLOCKED;
+    do
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&lock->state, &previousState, previousState + 1))
+            break;
+    }
+    while (previousState != Detail_Baselib_Lock_CONTENDED);
+
+    if (OPTIMIZER_LIKELY(previousState == Detail_Baselib_Lock_UNLOCKED))
+        return true;
+
+    uint32_t timeLeft = timeoutInMilliseconds;
+    const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
+    do
+    {
+        Baselib_SystemFutex_Wait(&lock->state, Detail_Baselib_Lock_CONTENDED, timeoutInMilliseconds);
+        const int32_t previousState = Baselib_atomic_exchange_32_relaxed(&lock->state, Detail_Baselib_Lock_CONTENDED);
+        if (previousState == Detail_Baselib_Lock_UNLOCKED)
+            return true;
+        timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
+    }
+    while (timeLeft);
+    return false;
+}
+
+BASELIB_INLINE_API void Baselib_Lock_Release(Baselib_Lock* lock)
+{
+    const int32_t previousState = Baselib_atomic_exchange_32_release(&lock->state, Detail_Baselib_Lock_UNLOCKED);
+    if (previousState == Detail_Baselib_Lock_CONTENDED)
+        Baselib_SystemFutex_Notify(&lock->state, 1, Baselib_WakeupFallbackStrategy_OneByOne);
+}
+
+BASELIB_INLINE_API void Baselib_Lock_Free(Baselib_Lock* lock)
+{
+}
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_Lock_SemaphoreBased.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_Lock_SemaphoreBased.inl.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include "../Baselib_CountdownTimer.h"
+#include "../Baselib_CappedSemaphore.h"
+
+typedef struct Baselib_Lock
+{
+    Baselib_CappedSemaphore semaphore;
+} Baselib_Lock;
+
+BASELIB_INLINE_API Baselib_Lock Baselib_Lock_Create(void)
+{
+    Baselib_Lock lock = { Baselib_CappedSemaphore_Create(1) };
+    uint16_t submittedTokens = Baselib_CappedSemaphore_Release(&lock.semaphore, 1);
+    BaselibAssert(submittedTokens == 1, "CappedSemaphore was unable to accept our token");
+    return lock;
+}
+
+BASELIB_INLINE_API void Baselib_Lock_Acquire(Baselib_Lock* lock)
+{
+    Baselib_CappedSemaphore_Acquire(&lock->semaphore);
+}
+
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_INLINE_API bool Baselib_Lock_TryAcquire(Baselib_Lock* lock)
+{
+    return Baselib_CappedSemaphore_TryAcquire(&lock->semaphore);
+}
+
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_INLINE_API bool Baselib_Lock_TryTimedAcquire(Baselib_Lock* lock, const uint32_t timeoutInMilliseconds)
+{
+    return Baselib_CappedSemaphore_TryTimedAcquire(&lock->semaphore, timeoutInMilliseconds);
+}
+
+BASELIB_INLINE_API void Baselib_Lock_Release(Baselib_Lock* lock)
+{
+    Baselib_CappedSemaphore_Release(&lock->semaphore, 1);
+}
+
+BASELIB_INLINE_API void Baselib_Lock_Free(Baselib_Lock* lock)
+{
+    if (!lock)
+        return;
+    Baselib_CappedSemaphore_Free(&lock->semaphore);
+}
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_ReentrantLock.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_ReentrantLock.inl.h
@@ -0,0 +1,93 @@
+#pragma once
+
+#include "../Baselib_Lock.h"
+#include "../Baselib_StaticAssert.h"
+#include "../Baselib_Alignment.h"
+#include "../Baselib_Thread.h"
+
+typedef struct Baselib_ReentrantLock
+{
+    Baselib_Lock       lock;
+    Baselib_Thread_Id  owner;
+    int32_t            count;
+} Baselib_ReentrantLock;
+
+BASELIB_STATIC_ASSERT((BASELIB_ALIGN_OF(Baselib_ReentrantLock) + offsetof(Baselib_ReentrantLock, owner)) % sizeof(Baselib_Thread_Id) == 0, "Baselib_ReentrantLock::owner is not aligned for atomic use");
+BASELIB_STATIC_ASSERT((BASELIB_ALIGN_OF(Baselib_ReentrantLock) + offsetof(Baselib_ReentrantLock, count)) % sizeof(int32_t) == 0, "Baselib_ReentrantLock::count is not aligned for atomic use");
+
+BASELIB_INLINE_API Baselib_ReentrantLock Baselib_ReentrantLock_Create(void)
+{
+    Baselib_ReentrantLock lock = {Baselib_Lock_Create(), Baselib_Thread_InvalidId, 0};
+    return lock;
+}
+
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_INLINE_API bool Baselib_ReentrantLock_TryAcquire(Baselib_ReentrantLock* lock)
+{
+    const Baselib_Thread_Id currentThreadId = Baselib_Thread_GetCurrentThreadId();
+    const Baselib_Thread_Id lockOwner       = Baselib_atomic_load_ptr_relaxed(&lock->owner);
+    if (OPTIMIZER_LIKELY(currentThreadId != lockOwner))
+    {
+        if (!Baselib_Lock_TryAcquire(&lock->lock))
+            return false;
+        lock->owner = currentThreadId;
+        lock->count = 1;
+        return true;
+    }
+    lock->count++;
+    return true;
+}
+
+BASELIB_INLINE_API void Baselib_ReentrantLock_Acquire(Baselib_ReentrantLock* lock)
+{
+    const Baselib_Thread_Id currentThreadId = Baselib_Thread_GetCurrentThreadId();
+    const Baselib_Thread_Id lockOwner       = Baselib_atomic_load_ptr_relaxed(&lock->owner);
+    if (OPTIMIZER_LIKELY(currentThreadId != lockOwner))
+    {
+        Baselib_Lock_Acquire(&lock->lock);
+        lock->owner = currentThreadId;
+        lock->count = 1;
+        return;
+    }
+    lock->count++;
+}
+
+COMPILER_WARN_UNUSED_RESULT
+BASELIB_INLINE_API bool Baselib_ReentrantLock_TryTimedAcquire(Baselib_ReentrantLock* lock, const uint32_t timeoutInMilliseconds)
+{
+    const Baselib_Thread_Id currentThreadId = Baselib_Thread_GetCurrentThreadId();
+    const Baselib_Thread_Id lockOwner       = Baselib_atomic_load_ptr_relaxed(&lock->owner);
+    if (OPTIMIZER_LIKELY(currentThreadId != lockOwner))
+    {
+        if (!Baselib_Lock_TryTimedAcquire(&lock->lock, timeoutInMilliseconds))
+            return false;
+        lock->owner = currentThreadId;
+        lock->count = 1;
+        return true;
+    }
+    lock->count++;
+    return true;
+}
+
+BASELIB_INLINE_API void Baselib_ReentrantLock_Release(Baselib_ReentrantLock* lock)
+{
+    if (lock->count > 0)
+    {
+        BaselibAssert(Baselib_atomic_load_ptr_relaxed(&lock->owner) == Baselib_Thread_GetCurrentThreadId(), "A recursive lock can only be unlocked by the locking thread");
+        if (OPTIMIZER_LIKELY(lock->count == 1))
+        {
+            lock->owner = Baselib_Thread_InvalidId;
+            lock->count = 0;
+            Baselib_Lock_Release(&lock->lock);
+            return;
+        }
+        lock->count--;
+    }
+}
+
+BASELIB_INLINE_API void Baselib_ReentrantLock_Free(Baselib_ReentrantLock* lock)
+{
+    if (!lock)
+        return;
+    Baselib_Lock_Free(&lock->lock);
+}
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_Semaphore_FutexBased.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_Semaphore_FutexBased.inl.h
@@ -0,0 +1,152 @@
+#pragma once
+
+#include "../Baselib_CountdownTimer.h"
+#include "../Baselib_Atomic_TypeSafe.h"
+#include "../Baselib_SystemFutex.h"
+#include "../Baselib_Thread.h"
+
+#if !PLATFORM_FUTEX_NATIVE_SUPPORT
+    #error "Only use this implementation on top of a proper futex, in all other situations us Baselib_Semaphore_SemaphoreBased.inl.h"
+#endif
+
+// Space out to different cache lines.
+// the idea here is that threads waking up from sleep should not have to
+// access the cache line where count is stored, and only touch wakeups.
+// the only exception to that rule is if we hit a timeout.
+typedef struct Baselib_Semaphore
+{
+    int32_t wakeups;
+    char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
+    int32_t count;
+    char _cachelineSpacer2[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
+} Baselib_Semaphore;
+
+BASELIB_STATIC_ASSERT(sizeof(Baselib_Semaphore) == PLATFORM_CACHE_LINE_SIZE * 2, "Baselib_Semaphore (Futex) size should match 2*cacheline size (128bytes)");
+BASELIB_STATIC_ASSERT(offsetof(Baselib_Semaphore, wakeups) ==
+    (offsetof(Baselib_Semaphore, count) - PLATFORM_CACHE_LINE_SIZE), "Baselib_Semaphore (Futex) wakeups and count shouldnt share cacheline");
+
+BASELIB_INLINE_API Baselib_Semaphore Baselib_Semaphore_Create(void)
+{
+    Baselib_Semaphore semaphore = {0, {0}, 0, {0}};
+    return semaphore;
+}
+
+BASELIB_INLINE_API bool Detail_Baselib_Semaphore_ConsumeWakeup(Baselib_Semaphore* semaphore)
+{
+    int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->wakeups);
+    while (previousCount > 0)
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->wakeups, &previousCount, previousCount - 1))
+            return true;
+    }
+    return false;
+}
+
+BASELIB_INLINE_API bool Baselib_Semaphore_TryAcquire(Baselib_Semaphore* semaphore)
+{
+    int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
+    while (previousCount > 0)
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
+            return true;
+    }
+    return false;
+}
+
+BASELIB_INLINE_API void Baselib_Semaphore_Acquire(Baselib_Semaphore* semaphore)
+{
+    const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return;
+
+    while (!Detail_Baselib_Semaphore_ConsumeWakeup(semaphore))
+    {
+        Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, UINT32_MAX);
+    }
+}
+
+BASELIB_INLINE_API bool Baselib_Semaphore_TryTimedAcquire(Baselib_Semaphore* semaphore, const uint32_t timeoutInMilliseconds)
+{
+    const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return true;
+
+    uint32_t timeLeft = timeoutInMilliseconds;
+    const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
+    do
+    {
+        Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, timeLeft);
+        if (Detail_Baselib_Semaphore_ConsumeWakeup(semaphore))
+            return true;
+        timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
+    }
+    while (timeLeft);
+
+    // When timeout occurs we need to make sure we do one of the following:
+    // Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
+    //
+    // If count is not negative it's likely we are racing with a release operation in which case we
+    // may end up having a successful acquire operation.
+    do
+    {
+        int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
+        while (count < 0)
+        {
+            if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
+                return false;
+        }
+        // Likely a race, yield to give the release operation room to complete.
+        // This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
+        Baselib_Thread_YieldExecution();
+    }
+    while (!Detail_Baselib_Semaphore_ConsumeWakeup(semaphore));
+    return true;
+}
+
+BASELIB_INLINE_API void Baselib_Semaphore_Release(Baselib_Semaphore* semaphore, const uint16_t _count)
+{
+    const int32_t count = _count;
+    int32_t previousCount = Baselib_atomic_fetch_add_32_release(&semaphore->count, count);
+
+    // This should only be possible if thousands of threads enter this function simultaneously posting with a high count.
+    // See overflow protection below.
+    BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", previousCount, count);
+
+    if (OPTIMIZER_UNLIKELY(previousCount < 0))
+    {
+        const int32_t waitingThreads = -previousCount;
+        const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
+        Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
+        Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
+        return;
+    }
+
+    // overflow protection
+    // we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
+    // this way we won't have to do clamping on every iteration
+    while (OPTIMIZER_UNLIKELY(previousCount > Baselib_Semaphore_MaxGuaranteedCount * 2))
+    {
+        const int32_t maxCount = Baselib_Semaphore_MaxGuaranteedCount;
+        if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
+            return;
+    }
+}
+
+BASELIB_INLINE_API uint32_t Baselib_Semaphore_ResetAndReleaseWaitingThreads(Baselib_Semaphore* semaphore)
+{
+    const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
+    if (OPTIMIZER_LIKELY(count >= 0))
+        return 0;
+    const int32_t threadsToWakeup = -count;
+    Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
+    Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_All);
+    return threadsToWakeup;
+}
+
+BASELIB_INLINE_API void Baselib_Semaphore_Free(Baselib_Semaphore* semaphore)
+{
+    if (!semaphore)
+        return;
+    const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
+    BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
+}
--- a/Libraries/external/baselib/Include/C/Internal/Baselib_Semaphore_SemaphoreBased.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Baselib_Semaphore_SemaphoreBased.inl.h
@@ -0,0 +1,126 @@
+#pragma once
+
+#include "../Baselib_Atomic_TypeSafe.h"
+#include "../Baselib_SystemSemaphore.h"
+#include "../Baselib_Thread.h"
+
+
+#if PLATFORM_FUTEX_NATIVE_SUPPORT
+    #error "It's highly recommended to use Baselib_Semaphore_FutexBased.inl.h on platforms which has native semaphore support"
+#endif
+
+typedef struct Baselib_Semaphore
+{
+    Baselib_SystemSemaphore_Handle handle;
+    int32_t count;
+    char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t) - sizeof(Baselib_SystemSemaphore_Handle)];
+    char _systemSemaphoreData[Baselib_SystemSemaphore_PlatformSize];
+} Baselib_Semaphore;
+
+BASELIB_STATIC_ASSERT((offsetof(Baselib_Semaphore, count) + PLATFORM_CACHE_LINE_SIZE - sizeof(Baselib_SystemSemaphore_Handle)) ==
+    offsetof(Baselib_Semaphore, _systemSemaphoreData), "count and internalData must not share cacheline");
+
+BASELIB_INLINE_API Baselib_Semaphore Baselib_Semaphore_Create(void)
+{
+    Baselib_Semaphore semaphore = {{0}, 0, {0}, {0}};
+    semaphore.handle = Baselib_SystemSemaphore_CreateInplace(&semaphore._systemSemaphoreData);
+    return semaphore;
+}
+
+BASELIB_INLINE_API bool Baselib_Semaphore_TryAcquire(Baselib_Semaphore* semaphore)
+{
+    int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
+    while (previousCount > 0)
+    {
+        if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
+            return true;
+    }
+    return false;
+}
+
+BASELIB_INLINE_API void Baselib_Semaphore_Acquire(Baselib_Semaphore* semaphore)
+{
+    const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return;
+
+    Baselib_SystemSemaphore_Acquire(semaphore->handle);
+}
+
+BASELIB_INLINE_API bool Baselib_Semaphore_TryTimedAcquire(Baselib_Semaphore* semaphore, const uint32_t timeoutInMilliseconds)
+{
+    const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
+    if (OPTIMIZER_LIKELY(previousCount > 0))
+        return true;
+
+    if (OPTIMIZER_LIKELY(Baselib_SystemSemaphore_TryTimedAcquire(semaphore->handle, timeoutInMilliseconds)))
+        return true;
+
+    // When timeout occurs we need to make sure we do one of the following:
+    // Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
+    //
+    // If count is not negative it's likely we are racing with a release operation in which case we
+    // may end up having a successful acquire operation.
+    do
+    {
+        int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
+        while (count < 0)
+        {
+            if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
+                return false;
+        }
+        // Likely a race, yield to give the release operation room to complete.
+        // This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
+        Baselib_Thread_YieldExecution();
+    }
+    while (!Baselib_SystemSemaphore_TryAcquire(semaphore->handle));
+    return true;
+}
+
+BASELIB_INLINE_API void Baselib_Semaphore_Release(Baselib_Semaphore* semaphore, const uint16_t _count)
+{
+    const int32_t count = _count;
+    int32_t previousCount = Baselib_atomic_fetch_add_32_release(&semaphore->count, count);
+
+    // This should only be possible if thousands of threads enter this function simultaneously posting with a high count.
+    // See overflow protection below.
+    BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", previousCount, count);
+
+    if (OPTIMIZER_UNLIKELY(previousCount < 0))
+    {
+        const int32_t waitingThreads = -previousCount;
+        const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
+        Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
+        return;
+    }
+
+    // overflow protection
+    // we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
+    // this way we won't have to do clamping on every iteration
+    while (OPTIMIZER_UNLIKELY(previousCount > Baselib_Semaphore_MaxGuaranteedCount * 2))
+    {
+        const int32_t maxCount = Baselib_Semaphore_MaxGuaranteedCount;
+        if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
+            return;
+    }
+}
+
+BASELIB_INLINE_API uint32_t Baselib_Semaphore_ResetAndReleaseWaitingThreads(Baselib_Semaphore* semaphore)
+{
+    const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
+    if (OPTIMIZER_LIKELY(count >= 0))
+        return 0;
+    const int32_t threadsToWakeup = -count;
+
+    Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
+    return threadsToWakeup;
+}
+
+BASELIB_INLINE_API void Baselib_Semaphore_Free(Baselib_Semaphore* semaphore)
+{
+    if (!semaphore)
+        return;
+    const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
+    BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
+    Baselib_SystemSemaphore_FreeInplace(semaphore->handle);
+}
--- a/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Gcc.h
+++ b/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Gcc.h
@@ -0,0 +1,194 @@
+#pragma once
+
+#include "../../../C/Baselib_Atomic.h"
+#include "../../../C/Baselib_Atomic_Macros.h"
+#include "Baselib_Atomic_Gcc_Apple_LLVM_Patch.h"
+
+#if COMPILER_GCC && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 7))
+#pragma message "GNUC: " PP_STRINGIZE(__GNUC__) " GNUC_MINOR: " PP_STRINGIZE(__GNUC_MINOR__)
+#error "GCC is too old and/or missing compatible atomic built-in functions" PP_STRINGIZE(__GNUC__)
+#endif
+
+#define detail_intrinsic_relaxed __ATOMIC_RELAXED
+#define detail_intrinsic_acquire __ATOMIC_ACQUIRE
+#define detail_intrinsic_release __ATOMIC_RELEASE
+#define detail_intrinsic_acq_rel __ATOMIC_ACQ_REL
+#define detail_intrinsic_seq_cst __ATOMIC_SEQ_CST
+
+// Patch gcc and clang intrinsics to achieve a sequentially consistent barrier.
+// As of writing Clang 9, GCC 9 none of them produce a seq cst barrier for load-store operations.
+// To fix this we switch load store to be acquire release with a full final barrier.
+
+#define detail_ldst_intrinsic_relaxed detail_intrinsic_relaxed
+#define detail_ldst_intrinsic_acquire detail_intrinsic_acquire
+#define detail_ldst_intrinsic_release detail_intrinsic_release
+#define detail_ldst_intrinsic_acq_rel detail_intrinsic_acq_rel
+#define detail_ldst_intrinsic_seq_cst detail_intrinsic_seq_cst
+
+#if defined(__aarch64__)
+    #undef detail_ldst_intrinsic_seq_cst
+    #define detail_ldst_intrinsic_seq_cst __ATOMIC_ACQ_REL
+    #define detail_AARCH64_SEQCST_PATCH_BARRIER_relaxed
+    #define detail_AARCH64_SEQCST_PATCH_BARRIER_acquire
+    #define detail_AARCH64_SEQCST_PATCH_BARRIER_release
+    #define detail_AARCH64_SEQCST_PATCH_BARRIER_acq_rel
+    #define detail_AARCH64_SEQCST_PATCH_BARRIER_seq_cst __extension__({__atomic_thread_fence (__ATOMIC_SEQ_CST); });
+#else
+    #define detail_AARCH64_SEQCST_PATCH_BARRIER_relaxed
+    #define detail_AARCH64_SEQCST_PATCH_BARRIER_acquire
+    #define detail_AARCH64_SEQCST_PATCH_BARRIER_release
+    #define detail_AARCH64_SEQCST_PATCH_BARRIER_acq_rel
+    #define detail_AARCH64_SEQCST_PATCH_BARRIER_seq_cst
+#endif
+
+#define detail_THREAD_FENCE(order, ...)                                                                                     \
+static FORCE_INLINE void Baselib_atomic_thread_fence_##order(void)                                                          \
+{                                                                                                                           \
+    __extension__({__atomic_thread_fence (detail_intrinsic_##order); });                                                    \
+}                                                                                                                           \
+
+#define detail_LOAD(op, order, id , bits, int_type, ...)                                                                    \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(const void* obj, void* result)                            \
+{                                                                                                                           \
+    __extension__({ __atomic_load((int_type*)obj, (int_type*)result, detail_intrinsic_##order); });                         \
+}
+
+#define detail_LOAD_NOT_CONST(op, order, id , bits, int_type, ...)                                                          \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result)                                  \
+{                                                                                                                           \
+    __extension__({ __atomic_load((int_type*)obj, (int_type*)result, detail_intrinsic_##order); });                         \
+}
+
+#define detail_STORE(op, order, id , bits, int_type, ...)                                                                   \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value)                             \
+{                                                                                                                           \
+    __extension__({ __atomic_store((int_type*)obj, (int_type*)value, detail_intrinsic_##order); });                         \
+}
+
+#define detail_ALU(op, order, id , bits, int_type, ...)                                                                     \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result)               \
+{                                                                                                                           \
+    *(int_type*)result = __extension__({ __atomic_##op((int_type*)obj, *(int_type*)value, detail_ldst_intrinsic_##order); });\
+    detail_AARCH64_SEQCST_PATCH_BARRIER_##order;                                                                            \
+}
+
+#define detail_XCHG(op, order, id , bits, int_type, ...)                                                                    \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result)               \
+{                                                                                                                           \
+    __extension__({ __atomic_exchange((int_type*)obj, (int_type*)value, (int_type*)result, detail_ldst_intrinsic_##order); });\
+    detail_AARCH64_SEQCST_PATCH_BARRIER_##order;                                                                            \
+}
+
+#define detail_CMP_XCHG_WEAK(op, order1, order2, id , bits, int_type, ...)                                                  \
+static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value) \
+{                                                                                                                           \
+    detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value);                   \
+    bool result = __extension__({ __atomic_compare_exchange(                                                                \
+        (int_type*)obj,                                                                                                     \
+        (int_type*)expected,                                                                                                \
+        (int_type*)value,                                                                                                   \
+        1,                                                                                                                  \
+        detail_ldst_intrinsic_##order1,                                                                                     \
+        detail_ldst_intrinsic_##order2);                                                                                    \
+    });                                                                                                                     \
+    if (result) { detail_AARCH64_SEQCST_PATCH_BARRIER_##order1; }                                                           \
+    else { detail_AARCH64_SEQCST_PATCH_BARRIER_##order2;}                                                                   \
+    return result;                                                                                                          \
+}
+
+#define detail_CMP_XCHG_STRONG(op, order1, order2, id , bits, int_type, ...)                                                \
+static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value) \
+{                                                                                                                           \
+    detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value);                 \
+    bool result =  __extension__ ({ __atomic_compare_exchange(                                                              \
+        (int_type*)obj,                                                                                                     \
+        (int_type*)expected,                                                                                                \
+        (int_type*)value,                                                                                                   \
+        0,                                                                                                                  \
+        detail_ldst_intrinsic_##order1,                                                                                     \
+        detail_ldst_intrinsic_##order2);                                                                                    \
+    });                                                                                                                     \
+    if (result) { detail_AARCH64_SEQCST_PATCH_BARRIER_##order1; }                                                           \
+    else { detail_AARCH64_SEQCST_PATCH_BARRIER_##order2;}                                                                   \
+    return result;                                                                                                          \
+}
+
+#define detail_NOT_SUPPORTED(...)
+
+Baselib_Atomic_FOR_EACH_MEMORY_ORDER(
+    detail_THREAD_FENCE
+)
+
+Baselib_Atomic_FOR_EACH_ATOMIC_OP_MEMORY_ORDER_AND_TYPE(
+    detail_LOAD,            // load
+    detail_STORE,           // store
+    detail_ALU,             // add
+    detail_ALU,             // and
+    detail_ALU,             // or
+    detail_ALU,             // xor
+    detail_XCHG,            // exchange
+    detail_CMP_XCHG_WEAK,   // compare_exchange_weak
+    detail_CMP_XCHG_STRONG, // compare_exchange_strong
+)
+
+#if PLATFORM_ARCH_64
+
+Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
+    detail_LOAD_NOT_CONST,      // load
+    detail_STORE,               // store
+    detail_NOT_SUPPORTED,       // add
+    detail_NOT_SUPPORTED,       // and
+    detail_NOT_SUPPORTED,       // or
+    detail_NOT_SUPPORTED,       // xor
+    detail_XCHG,                // exchange
+    detail_CMP_XCHG_WEAK,       // compare_exchange_weak
+    detail_CMP_XCHG_STRONG,     // compare_exchange_strong
+    128, 128, __int128          // type information
+)
+
+Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
+    detail_LOAD_NOT_CONST,      // load
+    detail_STORE,               // store
+    detail_NOT_SUPPORTED,       // add
+    detail_NOT_SUPPORTED,       // and
+    detail_NOT_SUPPORTED,       // or
+    detail_NOT_SUPPORTED,       // xor
+    detail_XCHG,                // exchange
+    detail_CMP_XCHG_WEAK,       // compare_exchange_weak
+    detail_CMP_XCHG_STRONG,     // compare_exchange_strong
+    ptr2x, 128, __int128        // type information
+)
+#else
+
+Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
+    detail_LOAD_NOT_CONST,      // load
+    detail_STORE,               // store
+    detail_NOT_SUPPORTED,       // add
+    detail_NOT_SUPPORTED,       // and
+    detail_NOT_SUPPORTED,       // or
+    detail_NOT_SUPPORTED,       // xor
+    detail_XCHG,                // exchange
+    detail_CMP_XCHG_WEAK,       // compare_exchange_weak
+    detail_CMP_XCHG_STRONG,     // compare_exchange_strong
+    ptr2x, 64, int64_t          // type information
+)
+
+#endif
+
+#undef detail_intrinsic_relaxed
+#undef detail_intrinsic_acquire
+#undef detail_intrinsic_release
+#undef detail_intrinsic_acq_rel
+#undef detail_intrinsic_seq_cst
+
+#undef detail_THREAD_FENCE
+#undef detail_LOAD
+#undef detail_LOAD_NOT_CONST
+#undef detail_STORE
+#undef detail_ALU
+#undef detail_XCHG
+#undef detail_CMP_XCHG_WEAK
+#undef detail_CMP_XCHG_STRONG
+#undef detail_NOT_SUPPORTED
+
+#include "Baselib_Atomic_Gcc_Apple_LLVM_Patch_PostInclude.h"
--- a/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Gcc_Apple_LLVM_Patch.h
+++ b/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Gcc_Apple_LLVM_Patch.h
@@ -0,0 +1,142 @@
+#pragma once
+
+#if PLATFORM_USE_APPLE_LLVM_ATOMIC_CMPXCHG_128_PATCH
+
+//
+//  Patch for Apple LLVM version 8.x.x (clang-800.0.38 - clang-900.0.37) intrinsic 128-bit __atomic_compare_exchange implementation (debug, using opt level -O0).
+//  Note that this patch is only in effect on tvOS/iOS AArch64 debug builds for Apple LLVM version 8.x.x. Arm32 verified working without patch.
+//
+//  Problem:
+//   For the above builds, the __atomic_compare_exchange asm expasion used SUBS/SBCS to compare the pair of "obj" and "expected" values.
+//   SUBS/SBCS does not provide sufficient NZCV flags for comparing two 64-bit values.
+//   The result is erraneous comparison of "obj" and "expected". Some examples:
+//
+//    -- fails (lo != lo && hi == hi)
+//    obj.lo = 5;
+//    obj.hi = 10;
+//    expected.lo = 3;
+//    expected.hi = 10;
+//
+//   -- works (expected.lo < 0)
+//    obj.lo = 5;
+//    obj.hi = 20;
+//    expected.lo = -3;
+//    expected.hi = 20;
+//
+//    -- fails (obj.lo < 0 && hi == hi)
+//    obj.lo = -5;
+//    obj.hi = 30;
+//    expected.lo = 3;
+//    expected.hi = 30;
+//
+//    -- fails (expected.lo < 0 && obj.hi+1 == expected.hi)
+//    obj.lo = 5;
+//    obj.hi = 3;
+//    expected.lo = -3;
+//    expected.hi = 2;
+//
+//  Solution: Inline assembly replacement of __atomic_compare_exchange using the same approach as in release mode
+//
+//  Note: This patch should be removed in it's entirety once we require Apple LLVM version 9 (clang-900.0.37) or higher for building.
+//
+
+#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ld_instr, st_instr, barrier_instr) \
+{                                                                       \
+    register bool result asm ("w0");                                    \
+    asm volatile                                                        \
+    (                                                                   \
+        "   ldp     x12, x13, [%x4]         ; load expected         \n" \
+        "   ldp     x10, x11, [%x5]         ; load value            \n" \
+        "   " #ld_instr "  x9, x8, [%x3]    ; load obj              \n" \
+        "   eor     x13, x8, x13            ; compare to expected   \n" \
+        "   eor     x12, x9, x12                                    \n" \
+        "   orr     x12, x12, x13                                   \n" \
+        "   cbnz    x12, 0f                 ; not equal = no store  \n" \
+        "   " #st_instr "   w12, x10, x11, [%x0] ; try store        \n" \
+        "   cbnz    w12, 1f                                         \n" \
+        "   orr w0, wzr, #0x1               ; success, result in w0 \n" \
+        "   b   2f                                                  \n" \
+        "0:                                 ; no store              \n" \
+        "   clrex                                                   \n" \
+        "1:                                 ; failed store          \n" \
+        "   movz    w0, #0                                          \n" \
+        "2:                                 ; store expected, fail  \n" \
+        "   tbnz    w0, #0, 3f                                      \n" \
+        "   stp     x9, x8, [%x1]                                   \n" \
+        "3:                                                         \n" \
+        "   " #barrier_instr "                                      \n" \
+                                                                        \
+        : "+r" (obj), "+r" (expected), "=r" (result)                    \
+        : "r" (obj), "r" (expected), "r" (value)                        \
+        : "x8", "x9", "x10", "x11", "x12", "x13", "cc", "memory");      \
+                                                                        \
+  return result != 0;                                                   \
+}
+
+#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_relaxed_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldxp,  stxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_release_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldxp,  stlxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, dmb ish)
+#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, dmb ish)
+#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_seq_cst(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, dmb ish)
+
+#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ld_instr, st_instr, barrier_instr) \
+{                                                                           \
+    register bool result asm ("w0");                                        \
+    asm volatile                                                            \
+    (                                                                       \
+        "   ldp     x10, x11, [%x4]         ; load expected                 \n" \
+        "   ldp     x12, x13, [%x5]         ; load value                    \n" \
+        "0:                                                                 \n" \
+        "   " #ld_instr "  x9, x8, [%x3]    ; load obj (ldxp/ldaxp)         \n" \
+        "   eor     x14, x8, x11            ; compare to expected           \n" \
+        "   eor     x15, x9, x10                                            \n" \
+        "   orr     x14, x15, x14                                           \n" \
+        "   cbnz    x14, 1f                 ; not equal = no store          \n" \
+        "   " #st_instr "   w14, x12, x13, [%x0] ; try store (stxp/stlxp)   \n" \
+        "   cbnz    w14, 0b                 ; retry or store result in w0   \n" \
+        "   orr w0, wzr, #0x1                                               \n" \
+        "   b   2f                                                          \n" \
+        "1:                                 ; no store                      \n" \
+        "   movz    w0, #0                                                  \n" \
+        "   clrex                                                           \n" \
+        "2:                                 ; store expected on fail        \n" \
+        "   tbnz    w0, #0, 3f                                              \n" \
+        "   stp     x9, x8, [%x1]                                           \n" \
+        "3:                                                                 \n" \
+        "   " #barrier_instr "                                              \n" \
+                                                                            \
+        : "+r" (obj), "+r" (expected), "=r" (result)                        \
+        : "r" (obj), "r" (expected), "r" (value)                            \
+        : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "cc", "memory"); \
+                                                                            \
+  return result != 0;                                                       \
+}
+
+#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_relaxed_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldxp,  stxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_release_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldxp,  stlxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, )
+#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, dmb ish)
+#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, dmb ish)
+#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_seq_cst(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, dmb ish)
+
+#define detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value) \
+    if(sizeof(int_type) == 16) \
+        detail_APPLE_LLVM_CMP_XCHG_WEAK_128_##order1##_##order2(obj, expected, value);
+
+#define detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value) \
+    if(sizeof(int_type) == 16) \
+        detail_APPLE_LLVM_CMP_XCHG_STRONG_128_##order1##_##order2(obj, expected, value);
+
+#else // PLATFORM_USE_APPLE_LLVM_ATOMIC_CMPXCHG_128_PATCH
+
+#define detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH(...)
+#define detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH(...)
+
+#endif
--- a/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Gcc_Apple_LLVM_Patch_PostInclude.h
+++ b/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Gcc_Apple_LLVM_Patch_PostInclude.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#if PLATFORM_USE_APPLE_LLVM_ATOMIC_CMPXCHG_128_PATCH
+
+#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128
+#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_relaxed_relaxed
+#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_relaxed
+#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_acquire
+#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_release_relaxed
+#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_relaxed
+#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_acquire
+#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_relaxed
+#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_acquire
+#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_seq_cst
+
+#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128
+#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_relaxed_relaxed
+#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_relaxed
+#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_acquire
+#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_release_relaxed
+#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_relaxed
+#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_acquire
+#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_relaxed
+#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_acquire
+#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_seq_cst
+
+#undef detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH
+#undef detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH
+
+#endif
--- a/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_LLSC_Gcc.inl.h
+++ b/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_LLSC_Gcc.inl.h
@@ -0,0 +1,40 @@
+#pragma once
+
+// Arm exlusive state access break implementation
+#define detail_Baselib_atomic_llsc_break() __builtin_arm_clrex()
+
+// Arm exlusive LLSC implementation using intrinsics.
+#define detail_Baselib_atomic_llsc_arm_ts(obj, expected, value, code, ll_instr, sc_instr, load_barrier, store_barrier) \
+    do {                                                                                                               \
+        do {                                                                                                           \
+            *expected = __builtin_arm_##ll_instr(obj);                                                                 \
+            load_barrier;                                                                                              \
+            code;                                                                                                      \
+        } while (__builtin_arm_##sc_instr(*value, obj));                                                               \
+        store_barrier;                                                                                                 \
+    } while (false)
+
+#define detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ll_instr, sc_instr, loadbarrier, storebarrier) \
+    detail_Baselib_atomic_llsc_arm_ts((int_type*)((void*)obj),                                                                \
+                                        (int_type*)((void*)expected),                                                         \
+                                        (int_type*)((void*)value),                                                            \
+                                        code, ll_instr, sc_instr, loadbarrier, storebarrier)
+
+#define detail_Baselib_atomic_llsc_relaxed_relaxed_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, ,)
+#if PLATFORM_ARCH_64
+#define detail_Baselib_atomic_llsc_acquire_relaxed_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldaex, strex, ,)
+#define detail_Baselib_atomic_llsc_relaxed_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, stlex, ,)
+#define detail_Baselib_atomic_llsc_acquire_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldaex, stlex, ,)
+#define detail_Baselib_atomic_llsc_seq_cst_seq_cst_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldaex, stlex, , __builtin_arm_dmb(11) )
+#else
+#define detail_Baselib_atomic_llsc_acquire_relaxed_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, __builtin_arm_dmb(11), )
+#define detail_Baselib_atomic_llsc_relaxed_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, ,__builtin_arm_dmb(11) )
+#define detail_Baselib_atomic_llsc_acquire_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, __builtin_arm_dmb(11) , __builtin_arm_dmb(11) )
+#define detail_Baselib_atomic_llsc_seq_cst_seq_cst_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, __builtin_arm_dmb(11) , __builtin_arm_dmb(11) )
+#endif
+
+#define detail_Baselib_atomic_llsc_v(obj, expected, value, code, size, loadbarrier, storebarrier) \
+    detail_Baselib_atomic_llsc_##loadbarrier##_##storebarrier##_v(obj, expected, value, code, int##size##_t)
+
+#define detail_Baselib_atomic_llsc_128_v(obj, expected, value, code, loadbarrier, storebarrier) \
+    detail_Baselib_atomic_llsc_##loadbarrier##_##storebarrier##_v(obj, expected, value, code, __int128)
--- a/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Msvc.h
+++ b/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Msvc.h
@@ -0,0 +1,358 @@
+#pragma once
+
+#include "../../../C/Baselib_Atomic.h"
+#include "../../../C/Baselib_Atomic_Macros.h"
+
+#include "Baselib_Atomic_MsvcIntrinsics.h"
+
+#define detail_relaxed_relaxed(...) __VA_ARGS__
+#define detail_relaxed_acquire(...)
+#define detail_relaxed_release(...)
+#define detail_relaxed_acq_rel(...)
+#define detail_relaxed_seq_cst(...)
+#define detail_acquire_relaxed(...)
+#define detail_acquire_acquire(...) __VA_ARGS__
+#define detail_acquire_release(...)
+#define detail_acquire_acq_rel(...)
+#define detail_acquire_seq_cst(...)
+#define detail_release_relaxed(...)
+#define detail_release_acquire(...)
+#define detail_release_release(...) __VA_ARGS__
+#define detail_release_acq_rel(...)
+#define detail_release_seq_cst(...)
+#define detail_acq_rel_relaxed(...)
+#define detail_acq_rel_acquire(...)
+#define detail_acq_rel_release(...)
+#define detail_acq_rel_acq_rel(...) __VA_ARGS__
+#define detail_acq_rel_seq_cst(...)
+#define detail_seq_cst_relaxed(...)
+#define detail_seq_cst_acquire(...)
+#define detail_seq_cst_release(...)
+#define detail_seq_cst_acq_rel(...)
+#define detail_seq_cst_seq_cst(...) __VA_ARGS__
+
+
+#define detail_relaxed(memory_order, ...) detail_relaxed_##memory_order(__VA_ARGS__)
+#define detail_acquire(memory_order, ...) detail_acquire_##memory_order(__VA_ARGS__)
+#define detail_release(memory_order, ...) detail_release_##memory_order(__VA_ARGS__)
+#define detail_acq_rel(memory_order, ...) detail_acq_rel_##memory_order(__VA_ARGS__)
+#define detail_seq_cst(memory_order, ...) detail_seq_cst_##memory_order(__VA_ARGS__)
+
+// Intel
+// ------------------------------------------------------------------------------------------------------------------------------------------------------
+#if defined(_M_IX86) || defined(_M_X64)
+
+#define detail_intrinsic_relaxed
+#define detail_intrinsic_acquire
+#define detail_intrinsic_release
+#define detail_intrinsic_acq_rel
+#define detail_intrinsic_seq_cst
+
+#if defined(_M_X64)
+
+#define detail_THREAD_FENCE(order, ...)                                                                                                                 \
+static COMPILER_FORCEINLINE void Baselib_atomic_thread_fence_##order()                                                                                  \
+{                                                                                                                                                       \
+    detail_acquire(order, _ReadWriteBarrier());                                                                                                         \
+    detail_release(order, _ReadWriteBarrier());                                                                                                         \
+    detail_acq_rel(order, _ReadWriteBarrier());                                                                                                         \
+    detail_seq_cst(order, __faststorefence());                                                                                                          \
+}
+
+#else // #defined(_M_IX86)
+
+#define detail_THREAD_FENCE(order, ...)                                                                                                                 \
+static COMPILER_FORCEINLINE void Baselib_atomic_thread_fence_##order()                                                                                  \
+{                                                                                                                                                       \
+    detail_acquire(order, _ReadWriteBarrier());                                                                                                         \
+    detail_release(order, _ReadWriteBarrier());                                                                                                         \
+    detail_acq_rel(order, _ReadWriteBarrier());                                                                                                         \
+    detail_seq_cst(order, _ReadWriteBarrier(); __int32 temp = 0; _InterlockedExchange32(&temp, 0); _ReadWriteBarrier());                                \
+}
+
+#endif
+
+
+#define detail_LOAD_BITS_8(obj, result) *(__int8*)result = *(const volatile __int8*)obj
+#define detail_LOAD_BITS_16(obj, result) *(__int16*)result = *(const volatile __int16*)obj
+#define detail_LOAD_BITS_32(obj, result) *(__int32*)result = *(const volatile __int32*)obj
+#if PLATFORM_ARCH_64
+    #define detail_LOAD_BITS_64(obj, result) *(__int64*)result = *(const volatile __int64*)obj
+#else
+// x86 32-bit load/store 64-bit integer.
+// - SSE2 enabled yields (identical to __mm_store/load):
+// movsd   xmm0, QWORD PTR unsigned __int64 obj
+// movsd   QWORD PTR unsigned __int64 result, xmm0
+// - No SSE2 enabled yields:
+// fld     QWORD PTR unsigned __int64 obj
+// fstp    QWORD PTR unsigned __int64 result
+// Link comparing various implementations: https://godbolt.org/z/T3zW5M
+    #define detail_LOAD_BITS_64(obj, result) *(double*)result = *(const volatile double*)obj
+#endif
+
+#define detail_LOAD(op, order, id , bits, int_type, ...)                                                                                                \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(const void* obj, void* result)                                                        \
+{                                                                                                                                                       \
+    detail_LOAD_BITS_##bits(obj, result);                                                                                                               \
+    detail_acquire(order, _ReadWriteBarrier());                                                                                                         \
+    detail_seq_cst(order, _ReadWriteBarrier());                                                                                                         \
+}
+
+#define detail_LOAD_NOT_CONST(op, order, id , bits, int_type, ...)                                                                                      \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result)                                                              \
+{                                                                                                                                                       \
+    detail_LOAD_BITS_##bits(obj, result);                                                                                                               \
+    detail_acquire(order, _ReadWriteBarrier());                                                                                                         \
+    detail_seq_cst(order, _ReadWriteBarrier());                                                                                                         \
+}
+
+#define detail_STORE_BITS_8(obj, value) *(volatile __int8*)obj = *(const __int8*)value
+#define detail_STORE_BITS_16(obj, value) *(volatile __int16*)obj = *(const __int16*)value
+#define detail_STORE_BITS_32(obj, value) *(volatile __int32*)obj = *(const __int32*)value
+#if PLATFORM_ARCH_64
+    #define detail_STORE_BITS_64(obj, value) *(volatile __int64*)obj = *(const __int64*)value
+#else
+    #define detail_STORE_BITS_64(obj, value) *(volatile double*)obj = *(double*)value
+#endif
+
+#define detail_STORE(op, order, id , bits, int_type, ...)                                                                                               \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value)                                                         \
+{                                                                                                                                                       \
+    detail_relaxed(order, detail_STORE_BITS_##bits(obj, value));                                                                                        \
+    detail_release(order, detail_STORE_BITS_##bits(obj, value); _ReadWriteBarrier());                                                                   \
+    detail_seq_cst(order, _InterlockedExchange##bits((__int##bits*)obj, *(const __int##bits*)value));                                                   \
+}
+
+// ARM
+// ------------------------------------------------------------------------------------------------------------------------------------------------------
+#elif defined(_M_ARM) || defined(_M_ARM64)
+
+#define detail_intrinsic_relaxed _nf
+#define detail_intrinsic_acquire _acq
+#define detail_intrinsic_release _rel
+#define detail_intrinsic_acq_rel
+#define detail_intrinsic_seq_cst
+
+#define detail_THREAD_FENCE(order, ...)                                                                                                                 \
+static COMPILER_FORCEINLINE void Baselib_atomic_thread_fence_##order()                                                                                  \
+{                                                                                                                                                       \
+    detail_acquire(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+    detail_release(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+    detail_acq_rel(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+    detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+}
+
+#define detail_LOAD(op, order, id , bits, int_type, ...)                                                                                                \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(const void* obj, void* result)                                                        \
+{                                                                                                                                                       \
+    *(__int##bits*)result = __iso_volatile_load##bits((const __int##bits*)obj);                                                                         \
+    detail_acquire(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+    detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+}
+
+#define detail_LOAD_NOT_CONST(op, order, id , bits, int_type, ...)                                                                                      \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result)                                                              \
+{                                                                                                                                                       \
+    *(__int##bits*)result = __iso_volatile_load##bits((const __int##bits*)obj);                                                                         \
+    detail_acquire(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+    detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+}
+
+#define detail_STORE(op, order, id , bits, int_type, ...)                                                                                               \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value)                                                         \
+{                                                                                                                                                       \
+    detail_release(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+    detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+    __iso_volatile_store##bits((__int##bits*) obj, *(const __int##bits*)value);                                                                         \
+    detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH));                                                                                                     \
+}
+
+#endif
+
+// Common
+// ------------------------------------------------------------------------------------------------------------------------------------------------------
+
+#define detail_intrinsic_exchange   _InterlockedExchange
+#define detail_intrinsic_fetch_add  _InterlockedExchangeAdd
+#define detail_intrinsic_fetch_and  _InterlockedAnd
+#define detail_intrinsic_fetch_or   _InterlockedOr
+#define detail_intrinsic_fetch_xor  _InterlockedXor
+
+#define detail_LOAD_STORE(op, order, id , bits, int_type, ...)                                                                                          \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result)                                           \
+{                                                                                                                                                       \
+    *(__int##bits##*)result = PP_CONCAT(detail_intrinsic_##op, bits, detail_intrinsic_##order)((__int##bits##*)obj, *(const __int##bits##*)value);      \
+}
+
+#define detail_CMP_XCHG(op, order1, order2, id , bits, int_type, ...)                                                                                   \
+static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value)                             \
+{                                                                                                                                                       \
+    __int##bits cmp =  *(__int##bits##*)expected;                                                                                                       \
+    __int##bits result = PP_CONCAT(_InterlockedCompareExchange, bits, detail_intrinsic_##order1)((__int##bits##*)obj, *(__int##bits##*)value, cmp);     \
+    return result == cmp ? true : (*(__int##bits##*)expected = result, false);                                                                          \
+}
+
+#define detail_NOT_SUPPORTED(...)
+
+// Setup implementation
+// ------------------------------------------------------------------------------------------------------------------------------------------------------
+
+Baselib_Atomic_FOR_EACH_MEMORY_ORDER(
+    detail_THREAD_FENCE
+)
+
+Baselib_Atomic_FOR_EACH_ATOMIC_OP_MEMORY_ORDER_AND_TYPE(
+    detail_LOAD,        // load
+    detail_STORE,       // store
+    detail_LOAD_STORE,  // add
+    detail_LOAD_STORE,  // and
+    detail_LOAD_STORE,  // or
+    detail_LOAD_STORE,  // xor
+    detail_LOAD_STORE,  // exchange
+    detail_CMP_XCHG,    // compare_exchange_weak
+    detail_CMP_XCHG     // compare_exchange_strong
+)
+
+#if PLATFORM_ARCH_64
+
+// 128-bit implementation
+// There are more efficient ways of doing load, store and exchange on Arm64. Unfortunately MSVC doesn't provide intrinsics for those. The specific
+// instructions needed to perform atomic load, store and exchange are also not available on MSVC.
+// Hence we fallback to cmpxchg for all atomic ops.
+// ------------------------------------------------------------------------------------------------------------------------------------------------------
+#define detail_LOAD128(op, order, id, ...)                                                                                                              \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result)                                                              \
+{                                                                                                                                                       \
+    Baselib_atomic_compare_exchange_weak_128_##order##_##order##_v((void*)obj, result, result);                                                         \
+}
+
+#define detail_STORE128(op, order, id, ...)                                                                                                             \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value)                                                         \
+{                                                                                                                                                       \
+    uint64_t comparand[2] = { ((volatile uint64_t*)obj)[0], ((volatile uint64_t*)obj)[1] };                                                             \
+    while(!Baselib_atomic_compare_exchange_weak_128_##order##_relaxed_v(obj, comparand, value))                                                         \
+        ;                                                                                                                                               \
+}
+
+#define detail_XCHG128(op, order, id, ...)                                                                                                              \
+static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result)                                           \
+{                                                                                                                                                       \
+    ((uint64_t*)result)[0] = ((volatile uint64_t*)obj)[0];                                                                                              \
+    ((uint64_t*)result)[1] = ((volatile uint64_t*)obj)[1];                                                                                              \
+    while(!Baselib_atomic_compare_exchange_weak_128_##order##_relaxed_v(obj, result, value))                                                            \
+        ;                                                                                                                                               \
+}
+
+#define detail_CMP_XCHG128(op, order1, order2, id, ...)                                                                                                 \
+static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value)                             \
+{                                                                                                                                                       \
+    return PP_CONCAT(_InterlockedCompareExchange128, detail_intrinsic_##order1)(                                                                        \
+        (__int64*)obj,                                                                                                                                  \
+        ((const __int64*)value)[1],                                                                                                                     \
+        ((const __int64*)value)[0],                                                                                                                     \
+        (__int64*)expected                                                                                                                              \
+    ) == 1;                                                                                                                                             \
+}
+
+Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
+    detail_LOAD128,         // load
+    detail_STORE128,        // store
+    detail_NOT_SUPPORTED,   // add
+    detail_NOT_SUPPORTED,   // and
+    detail_NOT_SUPPORTED,   // or
+    detail_NOT_SUPPORTED,   // xor
+    detail_XCHG128,         // exchange
+    detail_CMP_XCHG128,     // compare_exchange_weak
+    detail_CMP_XCHG128,     // compare_exchange_strong
+    128
+)
+
+Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
+    detail_LOAD128,         // load
+    detail_STORE128,        // store
+    detail_NOT_SUPPORTED,   // add
+    detail_NOT_SUPPORTED,   // and
+    detail_NOT_SUPPORTED,   // or
+    detail_NOT_SUPPORTED,   // xor
+    detail_XCHG128,         // exchange
+    detail_CMP_XCHG128,     // compare_exchange_weak
+    detail_CMP_XCHG128,     // compare_exchange_strong
+    ptr2x
+)
+
+#undef detail_LOAD128
+#undef detail_STORE128
+#undef detail_XCHG128
+#undef detail_CMP_XCHG128
+
+#else
+
+Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
+    detail_LOAD_NOT_CONST,  // load
+    detail_STORE,           // store
+    detail_NOT_SUPPORTED,   // add
+    detail_NOT_SUPPORTED,   // and
+    detail_NOT_SUPPORTED,   // or
+    detail_NOT_SUPPORTED,   // xor
+    detail_LOAD_STORE,      // exchange
+    detail_CMP_XCHG,        // compare_exchange_weak
+    detail_CMP_XCHG,        // compare_exchange_strong
+    ptr2x, 64, int64_t
+)
+
+#endif
+
+#undef detail_THREAD_FENCE
+#undef detail_LOAD
+#undef detail_LOAD_NOT_CONST
+#undef detail_STORE
+#undef detail_LOAD_STORE
+#undef detail_CMP_XCHG
+#undef detail_NOT_SUPPORTED
+
+#undef detail_LOAD_BITS_8
+#undef detail_LOAD_BITS_16
+#undef detail_LOAD_BITS_32
+#undef detail_LOAD_BITS_64
+#undef detail_STORE_BITS_8
+#undef detail_STORE_BITS_16
+#undef detail_STORE_BITS_32
+#undef detail_STORE_BITS_64
+
+#undef detail_intrinsic_exchange
+#undef detail_intrinsic_fetch_add
+#undef detail_intrinsic_fetch_and
+#undef detail_intrinsic_fetch_or
+#undef detail_intrinsic_fetch_xor
+
+#undef detail_relaxed_relaxed
+#undef detail_relaxed_acquire
+#undef detail_relaxed_release
+#undef detail_relaxed_acq_rel
+#undef detail_relaxed_seq_cst
+#undef detail_acquire_relaxed
+#undef detail_acquire_acquire
+#undef detail_acquire_release
+#undef detail_acquire_acq_rel
+#undef detail_acquire_seq_cst
+#undef detail_release_relaxed
+#undef detail_release_acquire
+#undef detail_release_release
+#undef detail_release_acq_rel
+#undef detail_release_seq_cst
+#undef detail_acq_rel_relaxed
+#undef detail_acq_rel_acquire
+#undef detail_acq_rel_release
+#undef detail_acq_rel_acq_rel
+#undef detail_acq_rel_seq_cst
+#undef detail_seq_cst_relaxed
+#undef detail_seq_cst_acquire
+#undef detail_seq_cst_release
+#undef detail_seq_cst_acq_rel
+#undef detail_seq_cst_seq_cst
+
+#undef detail_relaxed
+#undef detail_acquire
+#undef detail_release
+#undef detail_acq_rel
+#undef detail_seq_cst
--- a/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_MsvcIntrinsics.h
+++ b/Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_MsvcIntrinsics.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <intrin.h>
+
+#ifndef _ARM_BARRIER_ISH
+    #define _ARM_BARRIER_ISH 0xB
+#endif
+
+#define _InterlockedCompareExchange32(obj, value, exp)      _InterlockedCompareExchange((long*)obj, value, exp)
+#define _InterlockedCompareExchange32_nf(obj, value, exp)   _InterlockedCompareExchange_nf((long*)obj, value, exp)
+#define _InterlockedCompareExchange32_acq(obj, value, exp)  _InterlockedCompareExchange_acq((long*)obj, value, exp)
+#define _InterlockedCompareExchange32_rel(obj, value, exp)  _InterlockedCompareExchange_rel((long*)obj, value, exp)
+#define _InterlockedExchange32(obj, value)                  _InterlockedExchange((long*)obj, value)
+#define _InterlockedExchange32_nf(obj, value)               _InterlockedExchange_nf((long*)obj, value)
+#define _InterlockedExchange32_acq(obj, value)              _InterlockedExchange_acq((long*)obj, value)
+#define _InterlockedExchange32_rel(obj, value)              _InterlockedExchange_rel((long*)obj, value)
+#define _InterlockedExchangeAdd32(obj, value)               _InterlockedExchangeAdd((long*)obj, value)
+#define _InterlockedExchangeAdd32_nf(obj, value)            _InterlockedExchangeAdd_nf((long*)obj, value)
+#define _InterlockedExchangeAdd32_acq(obj, value)           _InterlockedExchangeAdd_acq((long*)obj, value)
+#define _InterlockedExchangeAdd32_rel(obj, value)           _InterlockedExchangeAdd_rel((long*)obj, value)
+#define _InterlockedAnd32(obj, value)                       _InterlockedAnd((long*)obj, value)
+#define _InterlockedAnd32_nf(obj, value)                    _InterlockedAnd_nf((long*)obj, value)
+#define _InterlockedAnd32_acq(obj, value)                   _InterlockedAnd_acq((long*)obj, value)
+#define _InterlockedAnd32_rel(obj, value)                   _InterlockedAnd_rel((long*)obj, value)
+#define _InterlockedOr32(obj, value)                        _InterlockedOr((long*)obj, value)
+#define _InterlockedOr32_nf(obj, value)                     _InterlockedOr_nf((long*)obj, value)
+#define _InterlockedOr32_acq(obj, value)                    _InterlockedOr_acq((long*)obj, value)
+#define _InterlockedOr32_rel(obj, value)                    _InterlockedOr_rel((long*)obj, value)
+#define _InterlockedXor32(obj, value)                       _InterlockedXor((long*)obj, value)
+#define _InterlockedXor32_nf(obj, value)                    _InterlockedXor_nf((long*)obj, value)
+#define _InterlockedXor32_acq(obj, value)                   _InterlockedXor_acq((long*)obj, value)
+#define _InterlockedXor32_rel(obj, value)                   _InterlockedXor_rel((long*)obj, value)
+
+// Use cmp_xchg on x86 to emulate 64 bit exchange and alu ops
+#if defined(_M_IX86)
+
+#undef _InterlockedExchange64
+#undef _InterlockedExchangeAdd64
+#undef _InterlockedOr64
+#undef _InterlockedAnd64
+#undef _InterlockedXor64
+
+#define detail_CAS_OP(_name, ...)                                                                       \
+static __forceinline __int64 _name(__int64* obj, __int64 value)                                         \
+{                                                                                                       \
+    __int64 p1, p2 = *obj;                                                                              \
+    do { p1 = p2; p2 = _InterlockedCompareExchange64(obj, (__VA_ARGS__), p1); } while (p1 != p2);       \
+    return p1;                                                                                          \
+}
+
+detail_CAS_OP(_InterlockedExchange64, value);
+detail_CAS_OP(_InterlockedExchangeAdd64, p1 + value);
+detail_CAS_OP(_InterlockedOr64, p1 | value);
+detail_CAS_OP(_InterlockedAnd64, p1 & value);
+detail_CAS_OP(_InterlockedXor64, p1 ^ value);
+#undef detail_CAS_OP
+
+#endif