[add] first

2023-10-08 10:24:48 +08:00
commit b1ae0510a9
1048 changed files with 3254361 additions and 0 deletions
--- a/Libraries/external/baselib/Include/Cpp/Algorithm.h
+++ b/Libraries/external/baselib/Include/Cpp/Algorithm.h
@@ -0,0 +1,281 @@
+#pragma once
+
+#include <type_traits>
+#include <limits>
+#include "Internal/TypeTraits.h"
+#include "Internal/Algorithm.inl.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        namespace Algorithm
+        {
+            // Index of the most significant bit in a 32bit mask. Returns -1 if no bits are set.
+            inline int HighestBit(uint32_t value);
+            // Index of the most significant bit in a 32bit mask of size_t value. Returns -1 if no bits are set.
+            template<typename T, typename std::enable_if<std::is_same<size_t, T>::value && sizeof(T) == 4, bool>::type = 0>
+            inline int HighestBit(T value) { return HighestBit(static_cast<uint32_t>(value)); }
+
+            // Index of the most significant bit in a 64bit mask. Returns -1 if no bits are set.
+            inline int HighestBit(uint64_t value);
+            // Index of the most significant bit in a 64bit mask of size_t value. Returns -1 if no bits are set.
+            template<typename T, typename std::enable_if<std::is_same<size_t, T>::value && sizeof(T) == 8, bool>::type = 0>
+            inline int HighestBit(T value) { return HighestBit(static_cast<uint64_t>(value)); }
+
+            // Index of the most significant bit in a 32bit mask. Unspecified result if no bits are set.
+            inline int HighestBitNonZero(uint32_t value);
+            // Index of the most significant bit in a 32bit mask of size_t value. Unspecified result if no bits are set.
+            template<typename T, typename std::enable_if<std::is_same<size_t, T>::value && sizeof(T) == 4, bool>::type = 0>
+            inline int HighestBitNonZero(T value) { return HighestBitNonZero(static_cast<uint32_t>(value)); }
+
+            // Index of the most significant bit in a 64bit mask. Unspecified result if no bits are set.
+            inline int HighestBitNonZero(uint64_t value);
+            // Index of the most significant bit in a 64bit mask of size_t value. Unspecified result if no bits are set.
+            template<typename T, typename std::enable_if<std::is_same<size_t, T>::value && sizeof(T) == 8, bool>::type = 0>
+            inline int HighestBitNonZero(T value) { return HighestBitNonZero(static_cast<uint64_t>(value)); }
+
+            // Index of the least significant bit in a 32bit mask. Returns -1 if no bits are set.
+            inline int LowestBit(uint32_t value);
+            // Index of the least significant bit in a 32bit mask of size_t value. Returns -1 if no bits are set.
+            template<typename T, typename std::enable_if<std::is_same<size_t, T>::value && sizeof(T) == 4, bool>::type = 0>
+            inline int LowestBit(T value) { return LowestBit(static_cast<uint32_t>(value)); }
+
+            // Index of the least significant bit in a 64bit mask. Returns -1 if no bits are set.
+            inline int LowestBit(uint64_t value);
+            // Index of the least significant bit in a 64bit mask of size_t value. Returns -1 if no bits are set.
+            template<typename T, typename std::enable_if<std::is_same<size_t, T>::value && sizeof(T) == 8, bool>::type = 0>
+            inline int LowestBit(T value) { return LowestBit(static_cast<uint64_t>(value)); }
+
+            // Index of the least significant bit in a 32bit mask. Unspecified result if no bits are set.
+            inline int LowestBitNonZero(uint32_t value);
+            // Index of the least significant bit in a 32bit mask of size_t value. Unspecified result if no bits are set.
+            template<typename T, typename std::enable_if<std::is_same<size_t, T>::value && sizeof(T) == 4, bool>::type = 0>
+            inline int LowestBitNonZero(T value) { return LowestBitNonZero(static_cast<uint32_t>(value)); }
+
+            // Index of the least significant bit in a 64bit mask. Unspecified result if no bits are set.
+            inline int LowestBitNonZero(uint64_t value);
+            // Index of the least significant bit in a 64bit mask of size_t value. Unspecified result if no bits are set.
+            template<typename T, typename std::enable_if<std::is_same<size_t, T>::value && sizeof(T) == 8, bool>::type = 0>
+            inline int LowestBitNonZero(T value) { return LowestBitNonZero(static_cast<uint64_t>(value)); }
+
+            // Returns number of set bits in a 64 bit mask.
+            inline int BitsInMask(uint64_t mask);
+            // Returns number of set bits in a 32 bit mask.
+            inline int BitsInMask(uint32_t mask);
+            // Returns number of set bits in a 16 bit mask.
+            inline int BitsInMask(uint16_t mask);
+            // Returns number os set bits in a 8 bit mask.
+            inline int BitsInMask(uint8_t mask);
+
+            // Number of set bits (population count) in an array of known size.
+            // Using Robert Harley and David Seal's algorithm from Hacker's Delight,
+            // variant that does 4 words in a loop iteration.
+            // http://www.hackersdelight.org/revisions.pdf
+            // http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc
+            template<typename WordT, int WordCount>
+            inline int BitsInArray(const WordT* data)
+            {
+    #define HarleySealCSAStep(h, l, a, b, c) {\
+        WordT u = a ^ b; \
+        h = (a & b) | (u & c); l = u ^ c; \
+    }
+                WordT ones, twos, twosA, twosB, fours;
+
+                int i = 0;
+                int tot = 0;
+                twos = ones = 0;
+                for (; i <= WordCount - 4; i = i + 4)
+                {
+                    HarleySealCSAStep(twosA, ones, ones, data[i], data[i + 1])
+                    HarleySealCSAStep(twosB, ones, ones, data[i + 2], data[i + 3])
+                    HarleySealCSAStep(fours, twos, twos, twosA, twosB)
+                    tot = tot + BitsInMask(fours);
+                }
+                tot = 4 * tot + 2 * BitsInMask(twos) + BitsInMask(ones);
+
+                for (; i < WordCount; i++) // Simply add in the last
+                    tot = tot + BitsInMask(data[i]); // 0 to 3 elements.
+
+                return tot;
+    #undef HarleySealCSAStep
+            }
+
+            // Checks if one integers is a multiple of another.
+            template<typename T>
+            constexpr inline bool AreIntegersMultiple(T a, T b)
+            {
+                static_assert(std::is_integral<T>::value, "AreIntegersMultiple requires integral types.");
+                return a != 0 && b != 0 && // if at least one integer is 0, consider false (avoid div by 0 of the following modulo)
+                    ((a % b) == 0 || (b % a) == 0);
+            }
+
+            // Checks if value is a power-of-two.
+            template<typename T>
+            constexpr inline bool IsPowerOfTwo(T value)
+            {
+                static_assert(std::is_integral<T>::value, "IsPowerOfTwo works only with an integral type.");
+                using T_unsigned = typename std::make_unsigned<T>::type;
+                return (static_cast<T_unsigned>(value) & (static_cast<T_unsigned>(value) - 1)) == 0;
+            }
+
+            // Returns the next power-of-two of a 32bit number or the current value if it is a power two.
+            constexpr inline uint32_t CeilPowerOfTwo(uint32_t value)
+            {
+                return detail::LogicalOrRShiftOp(
+                    detail::LogicalOrRShiftOp(
+                        detail::LogicalOrRShiftOp(
+                            detail::LogicalOrRShiftOp(
+                                detail::LogicalOrRShiftOp(value - 1, 16),
+                                8),
+                            4),
+                        2),
+                    1) + 1;
+            }
+
+            // Returns the next power-of-two of a 32bit number of size_t value, or the current value if it is a power two.
+            template<typename T, typename std::enable_if<std::is_same<size_t, T>::value && sizeof(T) == 4, bool>::type = 0>
+            constexpr inline uint32_t CeilPowerOfTwo(T value) { return CeilPowerOfTwo(static_cast<uint32_t>(value)); }
+
+            // Returns the next power-of-two of a 64bit number or the current value if it is a power two.
+            constexpr inline uint64_t CeilPowerOfTwo(uint64_t value)
+            {
+                return detail::LogicalOrRShiftOp(
+                    detail::LogicalOrRShiftOp(
+                        detail::LogicalOrRShiftOp(
+                            detail::LogicalOrRShiftOp(
+                                detail::LogicalOrRShiftOp(
+                                    detail::LogicalOrRShiftOp(value - 1, 32),
+                                    16),
+                                8),
+                            4),
+                        2),
+                    1) + 1;
+            }
+
+            // Returns the next power-of-two of a 64bit number of size_t value, or the current value if it is a power two.
+            template<typename T, typename std::enable_if<std::is_same<size_t, T>::value && sizeof(T) == 8, bool>::type = 0>
+            constexpr inline uint64_t CeilPowerOfTwo(T value) { return CeilPowerOfTwo(static_cast<uint64_t>(value)); }
+
+            // Returns the closest power-of-two of a 32bit number.
+            template<typename T>
+            constexpr inline T RoundPowerOfTwo(T value)
+            {
+                static_assert(std::is_unsigned<T>::value, "RoundPowerOfTwo works only with an unsigned integral type.");
+                return (value - (CeilPowerOfTwo(value) >> 1) < CeilPowerOfTwo(value) - value) ? CeilPowerOfTwo(value) >> 1 : CeilPowerOfTwo(value);
+            }
+
+            // Returns the next value aligned to `alignment`, or the current value if it is already aligned.
+            // `alignment` is required to be a power of two value or the result is undefined. Zero `alignment` returns zero.
+            template<typename T>
+            constexpr inline T CeilAligned(T value, uint64_t alignment)
+            {
+                static_assert(std::is_integral<T>::value, "CeilAligned works only with an integral type.");
+                return static_cast<T>((static_cast<typename std::make_unsigned<T>::type>(value) + alignment - 1) & ~(alignment - 1));
+            }
+
+            // Returns true if addition of two given operands leads to an integer overflow.
+            template<typename T>
+            constexpr inline bool DoesAdditionOverflow(T a, T b)
+            {
+                static_assert(std::is_unsigned<T>::value, "Overflow checks apply only work on unsigned integral types.");
+                return std::numeric_limits<T>::max() - a < b;
+            }
+
+            // Returns true if multiplication of two given operands leads to an integer overflow.
+            template<typename T>
+            constexpr inline bool DoesMultiplicationOverflow(T a, T b)
+            {
+                static_assert(std::is_unsigned<T>::value, "Overflow checks apply only work on unsigned integral types.");
+                return b != 0 && std::numeric_limits<T>::max() / b < a;
+            }
+
+            // Clamp
+            //
+            // This function can be used with different types - `value` vs. `lo`, `hi`.
+            // If `lo` if larger than `hi` this function has undefined bahavior.
+            //
+            // Return: clamped `value` of the same type as `lo`, `hi`.
+            //
+            COMPILER_WARNINGS_PUSH
+    #if COMPILER_MSVC
+            COMPILER_WARNINGS_DISABLE(4756)
+    #endif
+            template<typename RT, typename VT, typename std::enable_if<
+                baselib::is_of_same_signedness<RT, VT>::value
+                || !std::is_integral<RT>::value
+                || !std::is_integral<VT>::value
+                , bool>::type = 0>
+            inline RT Clamp(VT value, RT lo, RT hi)
+            {
+                if (value < lo) return lo;
+                if (value > hi) return hi;
+                return static_cast<RT>(value);
+            }
+
+            COMPILER_WARNINGS_POP
+
+            template<typename RT, typename VT, typename std::enable_if<
+                std::is_integral<RT>::value && std::is_unsigned<RT>::value &&
+                std::is_integral<VT>::value && std::is_signed<VT>::value
+                , bool>::type = 0>
+            inline RT Clamp(VT value, RT lo, RT hi)
+            {
+                if (value < 0)
+                    return lo;
+                using UnsignedVT = typename std::make_unsigned<VT>::type;
+                return Clamp(static_cast<UnsignedVT>(value), lo, hi);
+            }
+
+            template<typename RT, typename VT, typename std::enable_if<
+                std::is_integral<RT>::value && std::is_signed<RT>::value &&
+                std::is_integral<VT>::value && std::is_unsigned<VT>::value
+                , bool>::type = 0>
+            inline RT Clamp(VT value, RT lo, RT hi)
+            {
+                if (hi < 0)
+                    return hi;
+                if (lo < 0)
+                    lo = 0;
+                using UnsignedRT = typename std::make_unsigned<RT>::type;
+                return static_cast<RT>(Clamp(value, static_cast<UnsignedRT>(lo), static_cast<UnsignedRT>(hi)));
+            }
+
+            // Clamp `value` by lowest and highest value of RT.
+            //
+            // Return: clamped `value` of the type RT.
+            //
+            template<typename RT, typename VT, typename std::enable_if<
+                !(std::numeric_limits<RT>::has_infinity && std::numeric_limits<VT>::has_infinity)
+                , bool>::type = 0>
+            inline RT ClampToType(VT value)
+            {
+                return Clamp(value, std::numeric_limits<RT>::lowest(), std::numeric_limits<RT>::max());
+            }
+
+            // Clamp `value` by lowest and highest value of RT.
+            //
+            // This function is guaranteed to only return infinity values if the source value was already an infinity number.
+            //
+            // Return: clamped `value` of the type RT.
+            //
+            template<typename RT, typename VT, typename std::enable_if<
+                (std::numeric_limits<RT>::has_infinity && std::numeric_limits<VT>::has_infinity)
+                , bool>::type = 0>
+            inline RT ClampToType(VT value)
+            {
+                if (value == std::numeric_limits<VT>::infinity() || value == -std::numeric_limits<VT>::infinity())
+                    return static_cast<RT>(value);
+                return Clamp(value, std::numeric_limits<RT>::lowest(), std::numeric_limits<RT>::max());
+            }
+        }
+    }
+}
+
+#if COMPILER_MSVC
+    #include "Internal/Compiler/Msvc/AlgorithmMsvc.inl.h"
+#elif COMPILER_GCC || COMPILER_CLANG
+    #include "Internal/Compiler/ClangOrGcc/AlgorithmClangOrGcc.inl.h"
+#else
+    #error "Unknown Compiler"
+#endif
--- a/Libraries/external/baselib/Include/Cpp/Atomic.h
+++ b/Libraries/external/baselib/Include/Cpp/Atomic.h
@@ -0,0 +1,449 @@
+#pragma once
+
+#include "../C/Baselib_Atomic.h"
+#include "Internal/TypeTraits.h"
+
+// Note that aligning by type is not possible with the C compatible COMPILER_ALIGN_AS as MSVC's own alignment attribute does not allow evaluation of sizeof
+#define ALIGN_ATOMIC(TYPE_)     alignas(sizeof(TYPE_))
+#define ALIGNED_ATOMIC(TYPE_)   ALIGN_ATOMIC(TYPE_) TYPE_
+
+// Atomic interface that sticks closely to std::atomic
+// Major differences:
+// * free functions that operate on types other than baselib::atomic
+// * baselib::atomic allows access to its internal value
+// * no zero initialization on baselib::atomic
+// * no single parameter versions of compare_exchange
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        enum memory_order_relaxed_t { memory_order_relaxed = 0 }; // Equal to std::memory_order_relaxed
+        enum memory_order_acquire_t { memory_order_acquire = 2 }; // Equal to std::memory_order_acquire
+        enum memory_order_release_t { memory_order_release = 3 }; // Equal to std::memory_order_release
+        enum memory_order_acq_rel_t { memory_order_acq_rel = 4 }; // Equal to std::memory_order_acq_rel
+        enum memory_order_seq_cst_t { memory_order_seq_cst = 5 }; // Equal to std::memory_order_seq_cst
+
+        namespace detail
+        {
+            template<typename T, typename ... Rest>
+            struct is_any : std::false_type {};
+
+            template<typename T, typename First>
+            struct is_any<T, First> : std::is_same<T, First> {};
+
+            template<typename T, typename First, typename ... Rest>
+            struct is_any<T, First, Rest...>
+                : std::integral_constant<bool, std::is_same<T, First>::value || is_any<T, Rest...>::value>
+            {};
+
+    #define TEST_ATOMICS_PREREQUISITES(_TYPE) \
+        static_assert(baselib::is_trivially_copyable<_TYPE>::value, "atomic operation operands needs to be trivially copyable"); \
+        static_assert(sizeof(_TYPE) <= sizeof(void*) * 2, "atomic operation operands need to be smaller or equal than two pointers");
+
+            template<typename T> static inline T fail();
+
+            template<typename T, typename MemoryOrder, typename ... AllowedMemoryOrders> static inline T fail_prerequisites()
+            {
+                TEST_ATOMICS_PREREQUISITES(T);
+                static_assert(is_any<MemoryOrder, AllowedMemoryOrders...>::value, "the specified memory ordering is invalid for this atomic operation");
+                return fail<T>();
+            }
+
+            template<typename T, typename MemoryOrderSuccess, typename MemoryOrderFailure> static inline T fail_prerequisites_cmpxchg()
+            {
+                TEST_ATOMICS_PREREQUISITES(T);
+                static_assert(
+                    // fail: relaxed, success: relaxed/acquire/release/seq_cst
+                    (std::is_same<MemoryOrderFailure, baselib::memory_order_relaxed_t>::value &&
+                        is_any<MemoryOrderSuccess, baselib::memory_order_relaxed_t, baselib::memory_order_acquire_t, baselib::memory_order_release_t, baselib::memory_order_seq_cst_t>::value) ||
+                    // fail: acquire, success acquire/release/seq_cst
+                    (std::is_same<MemoryOrderFailure, baselib::memory_order_relaxed_t>::value &&
+                        is_any<MemoryOrderSuccess, baselib::memory_order_acquire_t, baselib::memory_order_release_t, baselib::memory_order_seq_cst_t>::value) ||
+                    // fail: seq_cst, success: seq_cst
+                    (std::is_same<MemoryOrderSuccess, baselib::memory_order_seq_cst_t>::value && std::is_same<MemoryOrderFailure, baselib::memory_order_seq_cst_t>::value),
+                    "the specified combination of memory ordering is invalid for compare exchange operations");
+                return fail<T>();
+            }
+
+            template<typename T, typename MemoryOrder> static inline T fail_prerequisites_alu()
+            {
+                static_assert(std::is_integral<T>::value, "operands of arithmetic atomic operations need to be integral");
+                return fail_prerequisites<T, MemoryOrder,
+                                          baselib::memory_order_relaxed_t,
+                                          baselib::memory_order_acquire_t,
+                                          baselib::memory_order_release_t,
+                                          baselib::memory_order_acq_rel_t,
+                                          baselib::memory_order_seq_cst_t>();
+            }
+        }
+
+        // MACRO generated impl
+        // re-directs to Baselib_atomic_ API
+        // ----------------------------------------------------------------------------------------------------------------------------------
+    #define detail_THREAD_FENCE(order, ...)                                                                                             \
+        static FORCE_INLINE void atomic_thread_fence(memory_order_##order##_t order)                                                    \
+        {                                                                                                                               \
+            return Baselib_atomic_thread_fence_##order();                                                                               \
+        }
+
+    #define detail_LOAD(op, order, id, bits, ...)                                                                                       \
+        template<typename T, typename std::enable_if<baselib::is_trivial_of_size<T, bits/8>::value, int>::type = 0>                     \
+        static FORCE_INLINE T atomic_load_explicit(const T& obj, memory_order_##order##_t order)                                        \
+        {                                                                                                                               \
+            T ret;                                                                                                                      \
+            Baselib_atomic_load_##id##_##order##_v(&obj, &ret);                                                                         \
+            return ret;                                                                                                                 \
+        }
+
+    #define detail_LOAD128(op, order, id, bits, ...)                                                                                    \
+        template<typename T, typename std::enable_if<baselib::is_trivial_of_size<T, bits/8>::value, int>::type = 0>                     \
+        static FORCE_INLINE T atomic_load_explicit(const T& obj, memory_order_##order##_t order)                                        \
+        {                                                                                                                               \
+            T ret;                                                                                                                      \
+            Baselib_atomic_load_##id##_##order##_v(const_cast<T*>(&obj), &ret);                                                         \
+            return ret;                                                                                                                 \
+        }
+
+    #define detail_STORE(op, order, id, bits, ...)                                                                                      \
+        template<typename T, typename std::enable_if<baselib::is_trivial_of_size<T, bits/8>::value, int>::type = 0>                     \
+        static FORCE_INLINE void atomic_store_explicit(T& obj, typename std::common_type<T>::type value, memory_order_##order##_t order)\
+        {                                                                                                                               \
+            return Baselib_atomic_store_##id##_##order##_v(&obj, &value);                                                               \
+        }
+
+    #define detail_LOAD_STORE(op, order, id, bits, ...)                                                                                 \
+        template<typename T, typename std::enable_if<baselib::is_trivial_of_size<T, bits/8>::value, int>::type = 0>                     \
+        static FORCE_INLINE T atomic_##op##_explicit(T& obj, typename std::common_type<T>::type value, memory_order_##order##_t order)  \
+        {                                                                                                                               \
+            T ret;                                                                                                                      \
+            Baselib_atomic_##op##_##id##_##order##_v(&obj, &value, &ret);                                                               \
+            return ret;                                                                                                                 \
+        }
+
+    #define detail_ALU(op, order, id, bits, ...)                                                                                        \
+        template<typename T, typename std::enable_if<baselib::is_integral_of_size<T, bits/8>::value, int>::type = 0>                    \
+        static FORCE_INLINE T atomic_##op##_explicit(T& obj, typename std::common_type<T>::type value, memory_order_##order##_t order)  \
+        {                                                                                                                               \
+            T ret;                                                                                                                      \
+            Baselib_atomic_##op##_##id##_##order##_v(&obj, &value, &ret);                                                               \
+            return ret;                                                                                                                 \
+        }
+
+    #define detail_CMP_XCHG(op, order1, order2, id, bits, ...)                                                                          \
+        template<typename T, typename std::enable_if<baselib::is_trivial_of_size<T, bits/8>::value, int>::type = 0>                     \
+        static FORCE_INLINE bool atomic_##op##_explicit(T& obj,                                                                         \
+            typename std::common_type<T>::type& expected,                                                                               \
+            typename std::common_type<T>::type desired,                                                                                 \
+            memory_order_##order1##_t order_success,                                                                                    \
+            memory_order_##order2##_t order_failure)                                                                                    \
+        {                                                                                                                               \
+            return Baselib_atomic_##op##_##id##_##order1##_##order2##_v(&obj, &expected, &desired);                                     \
+        }
+
+    #define detail_NOT_SUPPORTED(...)
+
+        Baselib_Atomic_FOR_EACH_MEMORY_ORDER(
+            detail_THREAD_FENCE
+        )
+        Baselib_Atomic_FOR_EACH_ATOMIC_OP_MEMORY_ORDER_AND_INT_TYPE(
+            detail_LOAD,    // load
+            detail_STORE,   // store
+            detail_ALU,     // add
+            detail_ALU,     // and
+            detail_ALU,     // or
+            detail_ALU,     // xor
+            detail_LOAD_STORE, // exchange
+            detail_CMP_XCHG, // compare_exchange_weak
+            detail_CMP_XCHG // compare_exchange_strong
+        )
+
+    #if PLATFORM_ARCH_64
+        // 128bit atomics
+        Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
+            detail_LOAD128,         // load
+            detail_STORE,           // store
+            detail_NOT_SUPPORTED,   // add
+            detail_NOT_SUPPORTED,   // and
+            detail_NOT_SUPPORTED,   // or
+            detail_NOT_SUPPORTED,   // xor
+            detail_LOAD_STORE,      // exchange
+            detail_CMP_XCHG,        // compare_exchange_weak
+            detail_CMP_XCHG,        // compare_exchange_strong
+            128, 128)
+    #endif
+
+    #undef detail_THREAD_FENCE
+    #undef detail_LOAD128
+    #undef detail_LOAD
+    #undef detail_STORE
+    #undef detail_LOAD_STORE
+    #undef detail_ALU
+    #undef detail_CMP_XCHG
+    #undef detail_NOT_SUPPORTED
+
+        template<typename T, typename MemoryOrder>
+        static FORCE_INLINE T atomic_fetch_sub_explicit(T& obj, typename std::common_type<T>::type value, MemoryOrder order)
+        {
+            return atomic_fetch_add_explicit(obj, 0 - value, order);
+        }
+
+        // API documentation and default fallback for non-matching types
+        // ----------------------------------------------------------------------------------------------------------------------
+        template<typename T, typename MemoryOrder>
+        static FORCE_INLINE T atomic_load_explicit(const T& obj, MemoryOrder order)
+        {
+            return detail::fail_prerequisites<T, MemoryOrder, baselib::memory_order_relaxed_t, baselib::memory_order_acquire_t, baselib::memory_order_seq_cst_t>();
+        }
+
+        template<typename T, typename MemoryOrder>
+        static FORCE_INLINE void atomic_store_explicit(T& obj, typename std::common_type<T>::type value, MemoryOrder order)
+        {
+            detail::fail_prerequisites<T, MemoryOrder, baselib::memory_order_relaxed_t, baselib::memory_order_release_t, baselib::memory_order_seq_cst_t>();
+        }
+
+        template<typename T, typename MemoryOrder>
+        static FORCE_INLINE T atomic_fetch_add_explicit(T& obj, typename std::common_type<T>::type value, MemoryOrder order)
+        {
+            return detail::fail_prerequisites_alu<T, MemoryOrder>();
+        }
+
+        template<typename T, typename MemoryOrder>
+        static FORCE_INLINE T atomic_fetch_and_explicit(T& obj, typename std::common_type<T>::type value, MemoryOrder order)
+        {
+            return detail::fail_prerequisites_alu<T, MemoryOrder>();
+        }
+
+        template<typename T, typename MemoryOrder>
+        static FORCE_INLINE T atomic_fetch_or_explicit(T& obj, typename std::common_type<T>::type value, MemoryOrder order)
+        {
+            return detail::fail_prerequisites_alu<T, MemoryOrder>();
+        }
+
+        template<typename T, typename MemoryOrder>
+        static FORCE_INLINE T atomic_fetch_xor_explicit(T& obj, typename std::common_type<T>::type value, MemoryOrder order)
+        {
+            return detail::fail_prerequisites_alu<T, MemoryOrder>();
+        }
+
+        template<typename T, typename MemoryOrder>
+        static FORCE_INLINE T atomic_exchange_explicit(T& obj, typename std::common_type<T>::type value, MemoryOrder order)
+        {
+            return detail::fail_prerequisites<T, MemoryOrder>();
+        }
+
+        template<typename T, typename MemoryOrderSuccess, typename MemoryOrderFailure>
+        static FORCE_INLINE bool atomic_compare_exchange_weak_explicit(T& obj,
+            typename std::common_type<T>::type& expected,
+            typename std::common_type<T>::type desired,
+            MemoryOrderSuccess order_success,
+            MemoryOrderFailure order_failure)
+        {
+            detail::fail_prerequisites_cmpxchg<T, MemoryOrderSuccess, MemoryOrderFailure>();
+            return false;
+        }
+
+        template<typename T, typename MemoryOrderSuccess, typename MemoryOrderFailure>
+        static FORCE_INLINE bool atomic_compare_exchange_strong_explicit(T& obj,
+            typename std::common_type<T>::type& expected,
+            typename std::common_type<T>::type desired,
+            MemoryOrderSuccess order_success,
+            MemoryOrderFailure order_failure)
+        {
+            detail::fail_prerequisites_cmpxchg<T, MemoryOrderSuccess, MemoryOrderFailure>();
+            return false;
+        }
+
+        // default memory order (memory_order_seq_cst)
+        // ----------------------------------------------------------------------------------------------------------------------
+        template<typename T>
+        static FORCE_INLINE T atomic_load(const T& obj)
+        {
+            return atomic_load_explicit(obj, memory_order_seq_cst);
+        }
+
+        template<typename T>
+        static FORCE_INLINE void atomic_store(T& obj, typename std::common_type<T>::type value)
+        {
+            return atomic_store_explicit(obj, value, memory_order_seq_cst);
+        }
+
+        template<typename T>
+        static FORCE_INLINE T atomic_fetch_add(T& obj, typename std::common_type<T>::type value)
+        {
+            return atomic_fetch_add_explicit(obj, value, memory_order_seq_cst);
+        }
+
+        template<typename T>
+        static FORCE_INLINE T atomic_fetch_sub(T& obj, typename std::common_type<T>::type value)
+        {
+            return atomic_fetch_sub_explicit(obj, value, memory_order_seq_cst);
+        }
+
+        template<typename T>
+        static FORCE_INLINE T atomic_fetch_and(T& obj, typename std::common_type<T>::type value)
+        {
+            return atomic_fetch_and_explicit(obj, value, memory_order_seq_cst);
+        }
+
+        template<typename T>
+        static FORCE_INLINE T atomic_fetch_or(T& obj, typename std::common_type<T>::type value)
+        {
+            return atomic_fetch_or_explicit(obj, value, memory_order_seq_cst);
+        }
+
+        template<typename T>
+        static FORCE_INLINE T atomic_fetch_xor(T& obj, typename std::common_type<T>::type value)
+        {
+            return atomic_fetch_xor_explicit(obj, value, memory_order_seq_cst);
+        }
+
+        template<typename T>
+        static FORCE_INLINE T atomic_exchange(T& obj, typename std::common_type<T>::type value)
+        {
+            return atomic_exchange_explicit(obj, value, memory_order_seq_cst);
+        }
+
+        template<typename T>
+        static FORCE_INLINE bool atomic_compare_exchange_weak(T& obj,
+            typename std::common_type<T>::type& expected,
+            typename std::common_type<T>::type desired)
+        {
+            return atomic_compare_exchange_weak_explicit(obj, expected, desired, memory_order_seq_cst, memory_order_seq_cst);
+        }
+
+        template<typename T>
+        static FORCE_INLINE bool atomic_compare_exchange_strong(T& obj,
+            typename std::common_type<T>::type& expected,
+            typename std::common_type<T>::type desired)
+        {
+            return atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_seq_cst, memory_order_seq_cst);
+        }
+
+        template<typename T>
+        struct atomic_common
+        {
+            using value_type = T;
+
+            TEST_ATOMICS_PREREQUISITES(T);
+
+            ALIGNED_ATOMIC(T) obj;
+
+            FORCE_INLINE atomic_common() = default;
+
+            // Initializes atomic with a given value. Initialization is not atomic!
+            FORCE_INLINE atomic_common(T value)
+            {
+                obj = value;
+            }
+
+            FORCE_INLINE operator T() const { return atomic_load_explicit(obj, memory_order_seq_cst); }
+            FORCE_INLINE T operator=(T value) { atomic_store_explicit(obj, value, memory_order_seq_cst); return value; }
+
+            template<typename TMemoryOrder = memory_order_seq_cst_t>
+            FORCE_INLINE T load(TMemoryOrder order = memory_order_seq_cst) const
+            {
+                return atomic_load_explicit(obj, order);
+            }
+
+            template<typename TMemoryOrder = memory_order_seq_cst_t>
+            FORCE_INLINE void store(T value, TMemoryOrder order = memory_order_seq_cst)
+            {
+                return atomic_store_explicit(obj, value, order);
+            }
+
+            template<typename TMemoryOrder = memory_order_seq_cst_t>
+            FORCE_INLINE T exchange(T value, TMemoryOrder order = memory_order_seq_cst)
+            {
+                return atomic_exchange_explicit(obj, value, order);
+            }
+
+            template<typename TMemoryOrderSuccess, typename TMemoryOrderFailure>
+            FORCE_INLINE bool compare_exchange_weak(T& expected, T desired, TMemoryOrderSuccess order_success, TMemoryOrderFailure order_failure)
+            {
+                return atomic_compare_exchange_weak_explicit(obj, expected, desired, order_success, order_failure);
+            }
+
+            FORCE_INLINE bool compare_exchange_weak(T& expected, T desired)
+            {
+                return atomic_compare_exchange_weak_explicit(obj, expected, desired, memory_order_seq_cst, memory_order_seq_cst);
+            }
+
+            template<typename TMemoryOrderSuccess, typename TMemoryOrderFailure>
+            FORCE_INLINE bool compare_exchange_strong(T& expected, T desired, TMemoryOrderSuccess order_success, TMemoryOrderFailure order_failure)
+            {
+                return atomic_compare_exchange_strong_explicit(obj, expected, desired, order_success, order_failure);
+            }
+
+            FORCE_INLINE bool compare_exchange_strong(T& expected, T desired)
+            {
+                return atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_seq_cst, memory_order_seq_cst);
+            }
+        };
+
+        template<typename T, bool IsIntegral>
+        struct atomic_base {};
+
+        // Atomic type for integral types.
+        template<typename T>
+        struct atomic_base<T, true> : atomic_common<T>
+        {
+            using atomic_common<T>::atomic_common;
+
+            template<typename TMemoryOrder = memory_order_seq_cst_t>
+            FORCE_INLINE T fetch_add(T value, TMemoryOrder order = memory_order_seq_cst)
+            {
+                return atomic_fetch_add_explicit(atomic_common<T>::obj, value, order);
+            }
+
+            template<typename TMemoryOrder = memory_order_seq_cst_t>
+            FORCE_INLINE T fetch_sub(T value, TMemoryOrder order = memory_order_seq_cst)
+            {
+                return atomic_fetch_sub_explicit(atomic_common<T>::obj, value, order);
+            }
+
+            template<typename TMemoryOrder = memory_order_seq_cst_t>
+            FORCE_INLINE T fetch_and(T value, TMemoryOrder order = memory_order_seq_cst)
+            {
+                return atomic_fetch_and_explicit(atomic_common<T>::obj, value, order);
+            }
+
+            template<typename TMemoryOrder = memory_order_seq_cst_t>
+            FORCE_INLINE T fetch_or(T value, TMemoryOrder order = memory_order_seq_cst)
+            {
+                return atomic_fetch_or_explicit(atomic_common<T>::obj, value, order);
+            }
+
+            template<typename TMemoryOrder = memory_order_seq_cst_t>
+            FORCE_INLINE T fetch_xor(T value, TMemoryOrder order = memory_order_seq_cst)
+            {
+                return atomic_fetch_xor_explicit(atomic_common<T>::obj, value, order);
+            }
+
+            FORCE_INLINE T operator++(int)      { return atomic_fetch_add_explicit(atomic_common<T>::obj, T(1), memory_order_seq_cst); }
+            FORCE_INLINE T operator--(int)      { return atomic_fetch_sub_explicit(atomic_common<T>::obj, T(1), memory_order_seq_cst); }
+            FORCE_INLINE T operator++()         { return atomic_fetch_add_explicit(atomic_common<T>::obj, T(1), memory_order_seq_cst) + T(1); }
+            FORCE_INLINE T operator--()         { return atomic_fetch_sub_explicit(atomic_common<T>::obj, T(1), memory_order_seq_cst) - T(1); }
+            FORCE_INLINE T operator+=(T value)  { return atomic_fetch_add_explicit(atomic_common<T>::obj, value, memory_order_seq_cst) + value; }
+            FORCE_INLINE T operator-=(T value)  { return atomic_fetch_sub_explicit(atomic_common<T>::obj, value, memory_order_seq_cst) - value; }
+            FORCE_INLINE T operator&=(T value)  { return atomic_fetch_and_explicit(atomic_common<T>::obj, value, memory_order_seq_cst) & value; }
+            FORCE_INLINE T operator|=(T value)  { return atomic_fetch_or_explicit(atomic_common<T>::obj, value, memory_order_seq_cst) | value; }
+            FORCE_INLINE T operator^=(T value)  { return atomic_fetch_xor_explicit(atomic_common<T>::obj, value, memory_order_seq_cst) ^ value; }
+        };
+
+        // Atomic type for non-integral types.
+        template<typename T>
+        struct atomic_base<T, false> : atomic_common<T>
+        {
+            using atomic_common<T>::atomic_common;
+        };
+
+        template<typename T>
+        struct atomic : atomic_base<T, std::is_integral<T>::value>
+        {
+            using atomic_base<T, std::is_integral<T>::value>::atomic_base;
+        };
+
+    #undef TEST_ATOMICS_PREREQUISITES
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Barrier.h
+++ b/Libraries/external/baselib/Include/Cpp/Barrier.h
@@ -0,0 +1,98 @@
+#pragma once
+
+#include "Atomic.h"
+#include "Semaphore.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In parallel computing, a barrier is a type of synchronization
+        // method. A barrier for a group of threads or processes in the source
+        // code means any thread/process must stop at this point and cannot
+        // proceed until all other threads/processes reach this barrier.
+        //
+        // "Barrier (computer science)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/wiki/Barrier_(computer_science)
+        //
+        // For optimal performance, baselib::Barrier should be stored at a
+        // cache aligned memory location.
+        class Barrier
+        {
+        public:
+            // non-copyable
+            Barrier(const Barrier& other) = delete;
+            Barrier& operator=(const Barrier& other) = delete;
+
+            // non-movable (strictly speaking not needed but listed to signal intent)
+            Barrier(Barrier&& other) = delete;
+            Barrier& operator=(Barrier&& other) = delete;
+
+            // Creates a barrier with a set number of threads to synchronize.
+            // Once a set of threads enter a Barrier, the *same* set of threads
+            // must continue to use the Barrier - i.e. no additional threads may
+            // enter any of the Acquires. For example, it is *not* allowed to
+            // create a Barrier with threads_num=10, then send 30 threads into
+            // barrier.Acquire() with the expectation 3 batches of 10 will be
+            // released. However, once it is guaranteed that all threads have
+            // exited all of the Acquire invocations, it is okay to reuse the
+            // same barrier object with a different set of threads - for
+            // example, after Join() has been called on all participating
+            // threads and a new batch of threads is launched.
+            //
+            // \param threads_num  Wait for this number of threads before letting all proceed.
+            explicit Barrier(uint16_t threads_num)
+                : m_threshold(threads_num), m_count(0)
+            {
+            }
+
+            // Block the current thread until the specified number of threads
+            // also reach this `Acquire()`.
+            void Acquire()
+            {
+                // If there is two Barrier::Acquire calls in a row, when the
+                // first Acquire releases, one thread may jump out of the gate
+                // so fast that it reaches the next Acquire and steals *another*
+                // semaphore slot, continuing past the *second* Acquire, before
+                // all threads have even left the first Acquire. So, we instead
+                // construct two semaphores and alternate between them to
+                // prevent this.
+
+                uint16_t previous_value = m_count.fetch_add(1, memory_order_relaxed);
+                BaselibAssert(previous_value < m_threshold * 2);
+
+                // If count is in range [0, m_threshold), use semaphore A.
+                // If count is in range [m_threshold, m_threshold * 2), use semaphore B.
+                bool useSemaphoreB = previous_value >= m_threshold;
+                Semaphore& semaphore = useSemaphoreB ? m_semaphoreB : m_semaphoreA;
+
+                // If (count % m_threshold) == (m_threshold - 1), then we're the last thread in the group, release the semaphore.
+                bool do_release = previous_value % m_threshold == m_threshold - 1;
+
+                if (do_release)
+                {
+                    if (previous_value == m_threshold * 2 - 1)
+                    {
+                        // Note this needs to happen before the Release to avoid
+                        // a race condition (if this thread yields right before
+                        // the Release, but after the add, the invariant of
+                        // previous_value < m_threshold * 2 may break for
+                        // another thread)
+                        m_count.fetch_sub(m_threshold * 2, memory_order_relaxed);
+                    }
+                    semaphore.Release(m_threshold - 1);
+                }
+                else
+                {
+                    semaphore.Acquire();
+                }
+            }
+
+        private:
+            Semaphore m_semaphoreA;
+            Semaphore m_semaphoreB;
+            uint16_t m_threshold;
+            atomic<uint16_t> m_count;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Baselib_DynamicLibrary.h
+++ b/Libraries/external/baselib/Include/Cpp/Baselib_DynamicLibrary.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include "../C/Baselib_DynamicLibrary.h"
+
+// alias for Baselib_DynamicLibrary_OpenUtf8
+static inline Baselib_DynamicLibrary_Handle Baselib_DynamicLibrary_Open(
+    const char* pathnameUtf8,
+    Baselib_ErrorState* errorState
+)
+{
+    return Baselib_DynamicLibrary_OpenUtf8(pathnameUtf8, errorState);
+}
+
+// alias for Baselib_DynamicLibrary_OpenUtf16
+static inline Baselib_DynamicLibrary_Handle Baselib_DynamicLibrary_Open(
+    const baselib_char16_t* pathnameUtf16,
+    Baselib_ErrorState* errorState
+)
+{
+    return Baselib_DynamicLibrary_OpenUtf16(pathnameUtf16, errorState);
+}
+
+static inline bool operator==(const Baselib_DynamicLibrary_Handle& a, const Baselib_DynamicLibrary_Handle& b)
+{
+    return a.handle == b.handle;
+}
+
+static inline bool operator!=(const Baselib_DynamicLibrary_Handle& a, const Baselib_DynamicLibrary_Handle& b)
+{
+    return a.handle != b.handle;
+}
--- a/Libraries/external/baselib/Include/Cpp/BinarySemaphore.h
+++ b/Libraries/external/baselib/Include/Cpp/BinarySemaphore.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include "CappedSemaphore.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, a semaphore is a variable or abstract data type used to control access to a common resource by multiple processes in a concurrent
+        // system such as a multitasking operating system. A semaphore is simply a variable. This variable is used to solve critical section problems and to achieve
+        // process synchronization in the multi processing environment. A trivial semaphore is a plain variable that is changed (for example, incremented or
+        // decremented, or toggled) depending on programmer-defined conditions.
+        //
+        // A useful way to think of a semaphore as used in the real-world system is as a record of how many units of a particular resource are available, coupled with
+        // operations to adjust that record safely (i.e. to avoid race conditions) as units are required or become free, and, if necessary, wait until a unit of the
+        // resource becomes available.
+        //
+        // "Semaphore (programming)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/w/index.php?title=Semaphore_(programming)&oldid=872408126
+        //
+        // For optimal performance, baselib::BinarySemaphore should be stored at a cache aligned memory location.
+        class BinarySemaphore : private CappedSemaphore
+        {
+        public:
+
+            // Creates a binary semaphore synchronization primitive.
+            // Binary means the semaphore can at any given time have at most one token available for consummation.
+            //
+            // This is just an API facade for CappedSemaphore(1)
+            //
+            // If there are not enough system resources to create a semaphore, process abort is triggered.
+            BinarySemaphore() : CappedSemaphore(1) {}
+
+            using CappedSemaphore::Acquire;
+            using CappedSemaphore::TryAcquire;
+            using CappedSemaphore::TryTimedAcquire;
+
+            // Submit token to the semaphore.
+            // If threads are waiting the token is consumed before this function return.
+            //
+            // When successful this function is guaranteed to emit a release barrier.
+            //
+            // \returns          true if a token was submitted, false otherwise (meaning the BinarySemaphore already has a token)
+            inline bool Release()
+            {
+                return CappedSemaphore::Release(1) == 1;
+            }
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/CappedSemaphore.h
+++ b/Libraries/external/baselib/Include/Cpp/CappedSemaphore.h
@@ -0,0 +1,112 @@
+#pragma once
+
+#include "../C/Baselib_CappedSemaphore.h"
+#include "Time.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, a semaphore is a variable or abstract data type used to control access to a common resource by multiple processes in a concurrent
+        // system such as a multitasking operating system. A semaphore is simply a variable. This variable is used to solve critical section problems and to achieve
+        // process synchronization in the multi processing environment. A trivial semaphore is a plain variable that is changed (for example, incremented or
+        // decremented, or toggled) depending on programmer-defined conditions.
+        //
+        // A useful way to think of a semaphore as used in the real-world system is as a record of how many units of a particular resource are available, coupled with
+        // operations to adjust that record safely (i.e. to avoid race conditions) as units are required or become free, and, if necessary, wait until a unit of the
+        // resource becomes available.
+        //
+        // "Semaphore (programming)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/w/index.php?title=Semaphore_(programming)&oldid=872408126
+        //
+        // For optimal performance, baselib::CappedSemaphore should be stored at a cache aligned memory location.
+        class CappedSemaphore
+        {
+        public:
+            // non-copyable
+            CappedSemaphore(const CappedSemaphore& other) = delete;
+            CappedSemaphore& operator=(const CappedSemaphore& other) = delete;
+
+            // non-movable (strictly speaking not needed but listed to signal intent)
+            CappedSemaphore(CappedSemaphore&& other) = delete;
+            CappedSemaphore& operator=(CappedSemaphore&& other) = delete;
+
+            // Creates a capped counting semaphore synchronization primitive.
+            // Cap is the number of tokens that can be held by the semaphore when there is no contention.
+            //
+            // If there are not enough system resources to create a semaphore, process abort is triggered.
+            CappedSemaphore(const uint16_t cap) : m_CappedSemaphoreData(Baselib_CappedSemaphore_Create(cap))
+            {
+            }
+
+            // Reclaim resources and memory held by the semaphore.
+            //
+            // If threads are waiting on the semaphore, destructor will trigger an assert and may cause process abort.
+            ~CappedSemaphore()
+            {
+                Baselib_CappedSemaphore_Free(&m_CappedSemaphoreData);
+            }
+
+            // Wait for semaphore token to become available
+            //
+            // This function is guaranteed to emit an acquire barrier.
+            inline void Acquire()
+            {
+                return Baselib_CappedSemaphore_Acquire(&m_CappedSemaphoreData);
+            }
+
+            // Try to consume a token and return immediately.
+            //
+            // When successful this function is guaranteed to emit an acquire barrier.
+            //
+            // Return:          true if token was consumed. false if not.
+            inline bool TryAcquire()
+            {
+                return Baselib_CappedSemaphore_TryAcquire(&m_CappedSemaphoreData);
+            }
+
+            // Wait for semaphore token to become available
+            //
+            // When successful this function is guaranteed to emit an acquire barrier.
+            //
+            // TryAcquire with a zero timeout differs from TryAcquire() in that TryAcquire() is guaranteed to be a user space operation
+            // while Acquire with a zero timeout may enter the kernel and cause a context switch.
+            //
+            // Timeout passed to this function may be subject to system clock resolution.
+            // If the system clock has a resolution of e.g. 16ms that means this function may exit with a timeout error 16ms earlier than originally scheduled.
+            //
+            // Arguments:
+            // - timeout:       Time to wait for token to become available.
+            //
+            // Return:          true if token was consumed. false if timeout was reached.
+            inline bool TryTimedAcquire(const timeout_ms timeoutInMilliseconds)
+            {
+                return Baselib_CappedSemaphore_TryTimedAcquire(&m_CappedSemaphoreData, timeoutInMilliseconds.count());
+            }
+
+            // Submit tokens to the semaphore.
+            // If threads are waiting an equal amount of tokens are consumed before this function return.
+            //
+            // When successful this function is guaranteed to emit a release barrier.
+            //
+            // \returns          number of submitted tokens.
+            inline uint16_t Release(const uint16_t count)
+            {
+                return Baselib_CappedSemaphore_Release(&m_CappedSemaphoreData, count);
+            }
+
+            // Sets the semaphore token count to zero and release all waiting threads.
+            //
+            // When successful this function is guaranteed to emit a release barrier.
+            //
+            // Return:          number of released threads.
+            inline uint32_t ResetAndReleaseWaitingThreads()
+            {
+                return Baselib_CappedSemaphore_ResetAndReleaseWaitingThreads(&m_CappedSemaphoreData);
+            }
+
+        private:
+            Baselib_CappedSemaphore   m_CappedSemaphoreData;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/ConditionVariable.h
+++ b/Libraries/external/baselib/Include/Cpp/ConditionVariable.h
@@ -0,0 +1,96 @@
+#pragma once
+
+#include "Time.h"
+#include "Lock.h"
+#include <cstdint>
+
+#if PLATFORM_FUTEX_NATIVE_SUPPORT
+#include "Internal/ConditionVariableData_FutexBased.inl.h"
+#else
+#include "Internal/ConditionVariableData_SemaphoreBased.inl.h"
+#endif
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // Conceptually a condition variable is a queue of threads, associated with a monitor, on which a thread may wait for some condition to become true.
+        //
+        // Thus each condition variable c is associated with an assertion Pc. While a thread is waiting on a condition variable, that thread is not considered
+        // to occupy the monitor, and so other threads may enter the monitor to change the monitor's state. In most types of monitors, these other threads may
+        // signal the condition variable c to indicate that assertion Pc is true in the current state.
+        //
+        // "Monitor (synchronization)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/w/index.php?title=Monitor_(synchronization)&oldid=914426020#Condition_variables_2
+        //
+        // For optimal performance, baselib::ConditionVariable should be stored at a cache aligned memory location.
+        class ConditionVariable
+        {
+        public:
+            // non-copyable
+            ConditionVariable(const ConditionVariable& other) = delete;
+            ConditionVariable& operator=(const ConditionVariable& other) = delete;
+
+            // non-movable (strictly speaking not needed but listed to signal intent)
+            ConditionVariable(ConditionVariable&& other) = delete;
+            ConditionVariable& operator=(ConditionVariable&& other) = delete;
+
+            // Creates a condition variable synchronization primitive.
+            ConditionVariable(Lock& lock) : m_Lock(lock)
+            {}
+
+            // Reclaim resources and memory held by the condition variable.
+            //
+            // If threads are waiting on the condition variable, destructor will trigger an assert and may cause process abort.
+            ~ConditionVariable()
+            {
+                BaselibAssert(!m_Data.HasWaiters(), "Destruction is not allowed when there are still threads waiting on the condition variable.");
+                NotifyAll();
+            }
+
+            // Wait for the condition variable to become available.
+            //
+            // The lock must have been previously acquired.
+            // For the duration of the wait the lock is released and then re-acquired upon exit.
+            // This function is guaranteed to emit an acquire barrier.
+            inline void Wait();
+
+            // Wait for the condition variable to become available.
+            //
+            // The lock must have been previously acquired.
+            // For the duration of the wait the lock is released and then re-acquired upon exit.
+            // This function is guaranteed to emit an acquire barrier.
+            //
+            // TimedWait with a zero timeout is guaranteed to be a user space operation.
+            //
+            // \param timeoutInMilliseconds   Time to wait for condition variable to become available.
+            // \returns true if the condition variable is available, false if timeout was reached.
+            inline bool TimedWait(const timeout_ms timeoutInMilliseconds);
+
+            // Wake up threads waiting for the condition variable.
+            //
+            // This function is guaranteed to emit a release barrier.
+            //
+            // \param count At most, `count` waiting threads will be notified, but never more than there are currently waiting.
+            inline void Notify(uint16_t count);
+
+            // Wake up all threads waiting for the condition variable.
+            //
+            // This function is guaranteed to emit a release barrier.
+            inline void NotifyAll()
+            {
+                Notify(std::numeric_limits<uint16_t>::max());
+            }
+
+        private:
+            Lock& m_Lock;
+            detail::ConditionVariableData m_Data;
+        };
+    }
+}
+
+#if PLATFORM_FUTEX_NATIVE_SUPPORT
+#include "Internal/ConditionVariable_FutexBased.inl.h"
+#else
+#include "Internal/ConditionVariable_SemaphoreBased.inl.h"
+#endif
--- a/Libraries/external/baselib/Include/Cpp/CountdownTimer.h
+++ b/Libraries/external/baselib/Include/Cpp/CountdownTimer.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include "../C/Baselib_CountdownTimer.h"
+#include "Time.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        class CountdownTimer
+        {
+        public:
+            //
+            // Create a countdown timer that already expired.
+            //
+            // Guaranteed to not sample the system timer.
+            //
+            static CountdownTimer InitializeExpired()
+            {
+                return CountdownTimer();
+            }
+
+            //
+            // Create and start a countdown timer.
+            //
+            static CountdownTimer StartNew(const high_precision_clock::duration timeout)
+            {
+                return CountdownTimer(timeout);
+            }
+
+            //
+            // Get time left before timeout expires.
+            //
+            // This function is guaranteed to return zero once timeout expired.
+            // It is also guaranteed that this function will not return zero until timeout expires.
+            // Return the time left as a high precision duration.
+            //
+            high_precision_clock::duration GetTimeLeft() const
+            {
+                return high_precision_clock::duration_from_ticks(Baselib_CountdownTimer_GetTimeLeftInTicks(m_CountdownTimer));
+            }
+
+            //
+            // Get time left before timeout expires.
+            //
+            // This function is guaranteed to return zero once timeout expired.
+            // It is also guaranteed that this function will not return zero until timeout expires.
+            // Return the time left as a millisecond integer duration.
+            //
+            timeout_ms GetTimeLeftInMilliseconds() const
+            {
+                return timeout_ms(Baselib_CountdownTimer_GetTimeLeftInMilliseconds(m_CountdownTimer));
+            }
+
+            //
+            // Check if timout has been reached.
+            //
+            bool TimeoutExpired() const
+            {
+                return Baselib_CountdownTimer_TimeoutExpired(m_CountdownTimer);
+            }
+
+        private:
+            CountdownTimer() : m_CountdownTimer{0, 0} {}
+            CountdownTimer(const high_precision_clock::duration timeout) : m_CountdownTimer(Baselib_CountdownTimer_StartTicks(high_precision_clock::ticks_from_duration_roundup(timeout))) {}
+
+            Baselib_CountdownTimer m_CountdownTimer;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/EventSemaphore.h
+++ b/Libraries/external/baselib/Include/Cpp/EventSemaphore.h
@@ -0,0 +1,121 @@
+#pragma once
+
+#include "../C/Baselib_EventSemaphore.h"
+#include "Time.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, an event (also called event semaphore) is a type of synchronization mechanism that is used to indicate to waiting processes when a
+        // particular condition has become true.
+        // An event is an abstract data type with a boolean state and the following operations:
+        // * wait - when executed, causes the suspension of the executing process until the state of the event is set to true. If the state is already set to true has no effect.
+        // * set - sets the event's state to true, release all waiting processes.
+        // * clear - sets the event's state to false.
+        //
+        // "Event (synchronization primitive)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/w/index.php?title=Event_(synchronization_primitive)&oldid=781517732
+        //
+        // For optimal performance, baselib::EventSemaphore should be stored at a cache aligned memory location.
+        class EventSemaphore
+        {
+        public:
+            // non-copyable
+            EventSemaphore(const EventSemaphore& other) = delete;
+            EventSemaphore& operator=(const EventSemaphore& other) = delete;
+
+            // non-movable (strictly speaking not needed but listed to signal intent)
+            EventSemaphore(EventSemaphore&& other) = delete;
+            EventSemaphore& operator=(EventSemaphore&& other) = delete;
+
+            // Creates an event semaphore synchronization primitive. Initial state of event is unset.
+            //
+            // If there are not enough system resources to create a semaphore, process abort is triggered.
+            EventSemaphore() : m_EventSemaphoreData(Baselib_EventSemaphore_Create())
+            {
+            }
+
+            // Reclaim resources and memory held by the semaphore.
+            // If threads are waiting on the semaphore, calling free may trigger an assert and may cause process abort.
+            ~EventSemaphore()
+            {
+                Baselib_EventSemaphore_Free(&m_EventSemaphoreData);
+            }
+
+            // Try to acquire semaphore.
+            //
+            // When semaphore is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // \returns true if event is set, false other wise.
+            COMPILER_WARN_UNUSED_RESULT
+            inline bool TryAcquire()
+            {
+                return Baselib_EventSemaphore_TryAcquire(&m_EventSemaphoreData);
+            }
+
+            // Acquire semaphore.
+            //
+            // This function is guaranteed to emit an acquire barrier.
+            inline void Acquire()
+            {
+                return Baselib_EventSemaphore_Acquire(&m_EventSemaphoreData);
+            }
+
+            // Try to acquire semaphore.
+            //
+            // If event is set this function return true, otherwise the thread will wait for event to be set or for release to be called.
+            //
+            // When semaphore is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // Acquire with a zero timeout differs from TryAcquire in that TryAcquire is guaranteed to be a user space operation
+            // while Acquire may enter the kernel and cause a context switch.
+            //
+            // Timeout passed to this function may be subject to system clock resolution.
+            // If the system clock has a resolution of e.g. 16ms that means this function may exit with a timeout error 16ms earlier than originally scheduled.
+            //
+            // \returns     true if semaphore was acquired.
+            COMPILER_WARN_UNUSED_RESULT
+            inline bool TryTimedAcquire(const timeout_ms timeoutInMilliseconds)
+            {
+                return Baselib_EventSemaphore_TryTimedAcquire(&m_EventSemaphoreData, timeoutInMilliseconds.count());
+            }
+
+            // Sets the event
+            //
+            // Setting the event will cause all waiting threads to wakeup. And will let all future acquiring threads through until Baselib_EventSemaphore_Reset is called.
+            // It is guaranteed that any thread waiting previously on the EventSemaphore will be woken up, even if the semaphore is immediately reset. (no lock stealing)
+            //
+            // Guaranteed to emit a release barrier.
+            inline void Set()
+            {
+                return Baselib_EventSemaphore_Set(&m_EventSemaphoreData);
+            }
+
+            // Reset event
+            //
+            // Resetting the event will cause all future acquiring threads to enter a wait state.
+            // Has no effect if the EventSemaphore is already in a reset state.
+            //
+            // Guaranteed to emit a release barrier.
+            inline void Reset()
+            {
+                return Baselib_EventSemaphore_Reset(&m_EventSemaphoreData);
+            }
+
+            // Reset event and release all waiting threads
+            //
+            // Resetting the event will cause all future acquiring threads to enter a wait state.
+            // If there were any threads waiting (i.e. the EventSemaphore was already in a release state) they will be released.
+            //
+            // Guaranteed to emit a release barrier.
+            inline void ResetAndRelease()
+            {
+                return Baselib_EventSemaphore_ResetAndReleaseWaitingThreads(&m_EventSemaphoreData);
+            }
+
+        private:
+            Baselib_EventSemaphore   m_EventSemaphoreData;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/HighCapacitySemaphore.h
+++ b/Libraries/external/baselib/Include/Cpp/HighCapacitySemaphore.h
@@ -0,0 +1,104 @@
+#pragma once
+
+#include "../C/Baselib_HighCapacitySemaphore.h"
+#include "Time.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // baselib::HighCapacitySemaphore is similar to baselib::Semaphore but allows for far greater token count.
+        // It is suitable to be used as resource counting semaphore.
+        class HighCapacitySemaphore
+        {
+        public:
+            // non-copyable
+            HighCapacitySemaphore(const HighCapacitySemaphore& other) = delete;
+            HighCapacitySemaphore& operator=(const HighCapacitySemaphore& other) = delete;
+
+            // non-movable (strictly speaking not needed but listed to signal intent)
+            HighCapacitySemaphore(HighCapacitySemaphore&& other) = delete;
+            HighCapacitySemaphore& operator=(HighCapacitySemaphore&& other) = delete;
+
+            // This is the max number of tokens guaranteed to be held by the semaphore at
+            // any given point in time. Tokens submitted that exceed this value may silently
+            // be discarded.
+            enum : int64_t { MaxGuaranteedCount = Baselib_HighCapacitySemaphore_MaxGuaranteedCount };
+
+            // Creates a counting semaphore synchronization primitive.
+            // If there are not enough system resources to create a semaphore, process abort is triggered.
+            HighCapacitySemaphore() : m_SemaphoreData(Baselib_HighCapacitySemaphore_Create())
+            {
+            }
+
+            // Reclaim resources and memory held by the semaphore.
+            //
+            // If threads are waiting on the semaphore, destructor will trigger an assert and may cause process abort.
+            ~HighCapacitySemaphore()
+            {
+                Baselib_HighCapacitySemaphore_Free(&m_SemaphoreData);
+            }
+
+            // Wait for semaphore token to become available
+            //
+            // This function is guaranteed to emit an acquire barrier.
+            inline void Acquire()
+            {
+                return Baselib_HighCapacitySemaphore_Acquire(&m_SemaphoreData);
+            }
+
+            // Try to consume a token and return immediately.
+            //
+            // When successful this function is guaranteed to emit an acquire barrier.
+            //
+            // Return:          true if token was consumed. false if not.
+            inline bool TryAcquire()
+            {
+                return Baselib_HighCapacitySemaphore_TryAcquire(&m_SemaphoreData);
+            }
+
+            // Wait for semaphore token to become available
+            //
+            // When successful this function is guaranteed to emit an acquire barrier.
+            //
+            // TryAcquire with a zero timeout differs from TryAcquire() in that TryAcquire() is guaranteed to be a user space operation
+            // while Acquire with a zero timeout may enter the kernel and cause a context switch.
+            //
+            // Timeout passed to this function may be subject to system clock resolution.
+            // If the system clock has a resolution of e.g. 16ms that means this function may exit with a timeout error 16ms earlier than originally scheduled.
+            //
+            // Arguments:
+            // - timeout:       Time to wait for token to become available.
+            //
+            // Return:          true if token was consumed. false if timeout was reached.
+            inline bool TryTimedAcquire(const timeout_ms timeoutInMilliseconds)
+            {
+                return Baselib_HighCapacitySemaphore_TryTimedAcquire(&m_SemaphoreData, timeoutInMilliseconds.count());
+            }
+
+            // Submit tokens to the semaphore.
+            //
+            // When successful this function is guaranteed to emit a release barrier.
+            //
+            // Increase the number of available tokens on the semaphore by `count`. Any waiting threads will be notified there are new tokens available.
+            // If count reach `Baselib_HighCapacitySemaphore_MaxGuaranteedCount` this function may silently discard any overflow.
+            inline void Release(uint32_t count)
+            {
+                return Baselib_HighCapacitySemaphore_Release(&m_SemaphoreData, count);
+            }
+
+            // Sets the semaphore token count to zero and release all waiting threads.
+            //
+            // When successful this function is guaranteed to emit a release barrier.
+            //
+            // Return:          number of released threads.
+            inline uint64_t ResetAndReleaseWaitingThreads()
+            {
+                return Baselib_HighCapacitySemaphore_ResetAndReleaseWaitingThreads(&m_SemaphoreData);
+            }
+
+        private:
+            Baselib_HighCapacitySemaphore m_SemaphoreData;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Internal/Algorithm.inl.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/Algorithm.inl.h
@@ -0,0 +1,16 @@
+#pragma once
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        namespace Algorithm
+        {
+        namespace detail
+        {
+            template<typename T>
+            static FORCE_INLINE constexpr T LogicalOrRShiftOp(T value, int shift) { return value | (value >> shift); }
+        }
+        }
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Internal/Compiler/ClangOrGcc/AlgorithmClangOrGcc.inl.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/Compiler/ClangOrGcc/AlgorithmClangOrGcc.inl.h
@@ -0,0 +1,63 @@
+#pragma once
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        namespace Algorithm
+        {
+            inline int HighestBitNonZero(uint32_t value)
+            {
+                return 31 - __builtin_clz(value);
+            }
+
+            inline int HighestBitNonZero(uint64_t value)
+            {
+#if PLATFORM_ARCH_64
+                return 63 - __builtin_clzll(value);
+#else
+                return (value & 0xffffffff00000000ULL) ? (63 - __builtin_clz((uint32_t)(value >> 32))) : (31 - __builtin_clz((uint32_t)value));
+#endif
+            }
+
+            inline int HighestBit(uint32_t value)
+            {
+                return value == 0 ? -1 : HighestBitNonZero(value);
+            }
+
+            inline int HighestBit(uint64_t value)
+            {
+                return value == 0 ? -1 : HighestBitNonZero(value);
+            }
+
+            inline int LowestBitNonZero(uint32_t value)
+            {
+                return __builtin_ctz(value);
+            }
+
+            inline int LowestBitNonZero(uint64_t value)
+            {
+#if  PLATFORM_ARCH_64
+                return __builtin_ctzll(value);
+#else
+                return (value & 0x00000000ffffffffULL) ? __builtin_ctz((uint32_t)(value)) : (32 + __builtin_ctz((uint32_t)(value >> 32)));
+#endif
+            }
+
+            inline int LowestBit(uint32_t value)
+            {
+                return value == 0 ? -1 : LowestBitNonZero(value);
+            }
+
+            inline int LowestBit(uint64_t value)
+            {
+                return value == 0 ? -1 : LowestBitNonZero(value);
+            }
+
+            inline int BitsInMask(uint64_t mask)   { return __builtin_popcountll(mask); }
+            inline int BitsInMask(uint32_t mask)   { return __builtin_popcount(mask); }
+            inline int BitsInMask(uint16_t mask)   { return BitsInMask((uint32_t)mask); }
+            inline int BitsInMask(uint8_t mask)    { return BitsInMask((uint32_t)mask); }
+        }
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Internal/Compiler/Msvc/AlgorithmMsvc.inl.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/Compiler/Msvc/AlgorithmMsvc.inl.h
@@ -0,0 +1,131 @@
+#pragma once
+
+#include <intrin.h>
+
+#pragma intrinsic(_BitScanReverse)
+#if PLATFORM_ARCH_64
+    #pragma intrinsic(_BitScanReverse64)
+#endif
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        namespace Algorithm
+        {
+            inline int HighestBit(uint32_t value)
+            {
+                unsigned long res;
+                return _BitScanReverse(&res, value) ? (int)res : -1;
+            }
+
+            inline int HighestBit(uint64_t value)
+            {
+#if PLATFORM_ARCH_64
+                unsigned long res;
+                return _BitScanReverse64(&res, value) ? (int)res : -1;
+#else
+                unsigned long lower, upper;
+                int lower_int = _BitScanReverse(&lower, (uint32_t)value) ? (int)lower : -1;
+                return _BitScanReverse(&upper, (uint32_t)(value >> 32)) ? (int)(32 + upper) : lower_int;
+#endif
+            }
+
+            inline int HighestBitNonZero(uint32_t value)
+            {
+                unsigned long res = 0;
+                _BitScanReverse(&res, value);
+                return (int)res;
+            }
+
+            inline int HighestBitNonZero(uint64_t value)
+            {
+#if PLATFORM_ARCH_64
+                unsigned long res = 0;
+                _BitScanReverse64(&res, value);
+                return (int)res;
+#else
+                unsigned long lower, upper;
+                _BitScanReverse(&lower, (uint32_t)value);
+                return _BitScanReverse(&upper, (uint32_t)(value >> 32)) ? (32 + upper) : lower;
+#endif
+            }
+
+            inline int LowestBit(uint32_t value)
+            {
+                unsigned long res;
+                return _BitScanForward(&res, value) ? (int)res : -1;
+            }
+
+            inline int LowestBit(uint64_t value)
+            {
+#if PLATFORM_ARCH_64
+                unsigned long res;
+                return _BitScanForward64(&res, value) ? (int)res : -1;
+#else
+                unsigned long lower, upper;
+                int upper_int = _BitScanForward(&upper, (uint32_t)(value >> 32)) ? (int)upper : -33;
+                return _BitScanForward(&lower, (uint32_t)(value)) ? (int)lower : (32 + upper_int);
+#endif
+            }
+
+            inline int LowestBitNonZero(uint32_t value)
+            {
+                unsigned long res = 0;
+                _BitScanForward(&res, value);
+                return (int)res;
+            }
+
+            inline int LowestBitNonZero(uint64_t value)
+            {
+#if PLATFORM_ARCH_64
+                unsigned long res = 0;
+                _BitScanForward64(&res, value);
+                return (int)res;
+#else
+                unsigned long lower, upper;
+                _BitScanForward(&upper, (uint32_t)(value >> 32));
+                return _BitScanForward(&lower, (uint32_t)(value)) ? (int)lower : (int)(32 + upper);
+#endif
+            }
+
+            // __popcnt/__popcnt16/__popcnt64 were introduced as part of SSE4a
+            // See https://en.wikipedia.org/wiki/SSE4#POPCNT_and_LZCNT
+            // To check this accurately, we would need to check cpuid which itself is not for free.
+            // However, compiling for some hardware, MSVC defines __AVX__ which is a superset of SSE4 so we can use that.
+            // (as of writing there's no equivalent __SSE4__)
+#if defined(__AVX__)
+#ifdef _AMD64_
+            inline int BitsInMask(uint64_t value)   { return (int)__popcnt64(value); }
+#else
+            inline int BitsInMask(uint64_t value)   { return BitsInMask((uint32_t)value) + BitsInMask((uint32_t)(value >> 32)); }
+#endif
+            inline int BitsInMask(uint32_t value)   { return (int)__popcnt(value); }
+            inline int BitsInMask(uint16_t value)   { return (int)__popcnt16(value); }
+            inline int BitsInMask(uint8_t  value)   { return BitsInMask((uint16_t)value); }
+
+            // Todo: Consider using VCNT instruction on arm (NEON)
+#else
+            inline int BitsInMask(uint64_t value)
+            {
+                // From http://www-graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+                value = value - ((value >> 1) & (uint64_t) ~(uint64_t)0 / 3);
+                value = (value & (uint64_t) ~(uint64_t)0 / 15 * 3) + ((value >> 2) & (uint64_t) ~(uint64_t)0 / 15 * 3);
+                value = (value + (value >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15;
+                return (uint64_t)(value * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * 8;
+            }
+
+            inline int BitsInMask(uint32_t value)
+            {
+                // From http://www-graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+                value = value - ((value >> 1) & 0x55555555);
+                value = (value & 0x33333333) + ((value >> 2) & 0x33333333);
+                return (((value + (value >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
+            }
+
+            inline int BitsInMask(uint16_t value)   { return BitsInMask((uint32_t)value); }
+            inline int BitsInMask(uint8_t value)    { return BitsInMask((uint32_t)value); }
+#endif
+        }
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Internal/ConditionVariableData_FutexBased.inl.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/ConditionVariableData_FutexBased.inl.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include "../Atomic.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        namespace detail
+        {
+            struct ConditionVariableData
+            {
+                atomic<int32_t>     waiters;
+                atomic<int32_t>     wakeups;
+
+                ConditionVariableData() : waiters(0), wakeups(0) {}
+
+                inline bool HasWaiters() const
+                {
+                    return waiters.load(memory_order_acquire) > 0;
+                }
+
+                inline bool TryConsumeWakeup()
+                {
+                    int32_t previousCount = wakeups.load(memory_order_relaxed);
+                    while (previousCount > 0)
+                    {
+                        if (wakeups.compare_exchange_weak(previousCount, previousCount - 1, memory_order_acquire, memory_order_relaxed))
+                        {
+                            return true;
+                        }
+                    }
+                    return false;
+                }
+            };
+        }
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Internal/ConditionVariableData_SemaphoreBased.inl.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/ConditionVariableData_SemaphoreBased.inl.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include "../Atomic.h"
+#include "../Semaphore.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        namespace detail
+        {
+            struct ConditionVariableData
+            {
+                Semaphore           semaphore;
+                atomic<uint32_t>    waiters;
+
+                ConditionVariableData() : semaphore(), waiters(0) {}
+
+                inline bool HasWaiters() const
+                {
+                    return waiters.load(memory_order_acquire) > 0;
+                }
+            };
+        }
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Internal/ConditionVariable_FutexBased.inl.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/ConditionVariable_FutexBased.inl.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include "../CountdownTimer.h"
+#include "../../C/Baselib_SystemFutex.h"
+#include "../../C/Baselib_Thread.h"
+
+#if !PLATFORM_FUTEX_NATIVE_SUPPORT
+    #error "Only use this implementation on top of a proper futex, in all other situations us ConditionVariable_SemaphoreBased.inl.h"
+#endif
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        inline void ConditionVariable::Wait()
+        {
+            m_Data.waiters.fetch_add(1, memory_order_relaxed);
+            m_Lock.Release();
+            while (!m_Data.TryConsumeWakeup())
+            {
+                Baselib_SystemFutex_Wait(&m_Data.wakeups.obj, 0, std::numeric_limits<uint32_t>::max());
+            }
+            m_Lock.Acquire();
+        }
+
+        inline bool ConditionVariable::TimedWait(const timeout_ms timeoutInMilliseconds)
+        {
+            m_Data.waiters.fetch_add(1, memory_order_relaxed);
+            m_Lock.Release();
+
+            uint32_t timeLeft = timeoutInMilliseconds.count();
+            auto timer = CountdownTimer::StartNew(timeoutInMilliseconds);
+            do
+            {
+                Baselib_SystemFutex_Wait(&m_Data.wakeups.obj, 0, timeLeft);
+                if (m_Data.TryConsumeWakeup())
+                {
+                    m_Lock.Acquire();
+                    return true;
+                }
+                timeLeft = timer.GetTimeLeftInMilliseconds().count();
+            }
+            while (timeLeft);
+
+            do
+            {
+                int32_t waiters = m_Data.waiters.load(memory_order_relaxed);
+                while (waiters > 0)
+                {
+                    if (m_Data.waiters.compare_exchange_weak(waiters, waiters - 1, memory_order_relaxed, memory_order_relaxed))
+                    {
+                        m_Lock.Acquire();
+                        return false;
+                    }
+                }
+                Baselib_Thread_YieldExecution();
+            }
+            while (!m_Data.TryConsumeWakeup());
+
+            m_Lock.Acquire();
+            return true;
+        }
+
+        inline void ConditionVariable::Notify(uint16_t count)
+        {
+            int32_t waitingThreads = m_Data.waiters.load(memory_order_acquire);
+            do
+            {
+                int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
+                if (threadsToWakeup == 0)
+                {
+                    atomic_thread_fence(memory_order_release);
+                    return;
+                }
+
+                if (m_Data.waiters.compare_exchange_weak(waitingThreads, waitingThreads - threadsToWakeup, memory_order_relaxed, memory_order_relaxed))
+                {
+                    m_Data.wakeups.fetch_add(threadsToWakeup, memory_order_release);
+                    Baselib_SystemFutex_Notify(&m_Data.wakeups.obj, threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
+                    return;
+                }
+            }
+            while (waitingThreads > 0);
+        }
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Internal/ConditionVariable_SemaphoreBased.inl.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/ConditionVariable_SemaphoreBased.inl.h
@@ -0,0 +1,61 @@
+#pragma once
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        inline void ConditionVariable::Wait()
+        {
+            m_Data.waiters.fetch_add(1, memory_order_relaxed);
+            m_Lock.Release();
+            m_Data.semaphore.Acquire();
+            m_Lock.Acquire();
+        }
+
+        inline bool ConditionVariable::TimedWait(const timeout_ms timeoutInMilliseconds)
+        {
+            m_Data.waiters.fetch_add(1, memory_order_relaxed);
+            m_Lock.Release();
+
+            bool acquired = m_Data.semaphore.TryTimedAcquire(timeoutInMilliseconds);
+
+            if (acquired)
+            {
+                m_Lock.Acquire();
+                return true;
+            }
+
+            do
+            {
+                uint32_t waiters = m_Data.waiters.load(memory_order_relaxed);
+                while (waiters > 0)
+                {
+                    if (m_Data.waiters.compare_exchange_weak(waiters, waiters - 1, memory_order_relaxed, memory_order_relaxed))
+                    {
+                        m_Lock.Acquire();
+                        return false;
+                    }
+                }
+                Baselib_Thread_YieldExecution();
+            }
+            while (!m_Data.semaphore.TryAcquire());
+
+            m_Lock.Acquire();
+            return true;
+        }
+
+        inline void ConditionVariable::Notify(uint16_t count)
+        {
+            uint32_t waitingThreads, threadsToWakeup;
+            do
+            {
+                waitingThreads = m_Data.waiters.load(memory_order_acquire);
+                threadsToWakeup = count < waitingThreads ? count : waitingThreads;
+                if (threadsToWakeup == 0)
+                    return;
+            }
+            while (!m_Data.waiters.compare_exchange_weak(waitingThreads, waitingThreads - threadsToWakeup, memory_order_relaxed, memory_order_relaxed));
+            m_Data.semaphore.Release(threadsToWakeup);
+        }
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Internal/TypeTraits.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/TypeTraits.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <type_traits>
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+// workaround for missing std::is_trivially_copyable
+// this can't be put inside compiler env due to __GLIBCXX__ not being set at that point
+#if (defined(__GLIBCXX__) && __GLIBCXX__ <= 20150623) || (COMPILER_GCC && __GNUC__ < 5)
+        template<typename T> struct is_trivially_copyable : std::has_trivial_copy_constructor<T> {};
+#else
+        template<typename T> struct is_trivially_copyable : std::is_trivially_copyable<T> {};
+#endif
+
+        template<typename T, size_t S> struct is_trivial_of_size : std::integral_constant<bool, is_trivially_copyable<T>::value && (sizeof(T) == S)> {};
+        template<typename T, size_t S> struct is_integral_of_size : std::integral_constant<bool, std::is_integral<T>::value && (sizeof(T) == S)> {};
+
+        template<typename T, typename T2> struct is_of_same_signedness : std::integral_constant<bool, std::is_signed<T>::value == std::is_signed<T2>::value> {};
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Internal/heap_allocator.inl.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/heap_allocator.inl.h
@@ -0,0 +1,129 @@
+#pragma once
+
+#include "../../C/Baselib_Memory.h"
+
+// Internal, to enable override of default C Api implementation for unit-tests
+#ifndef detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL
+#define detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL 0
+#endif
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        namespace detail
+        {
+            // Default memory allocation methods
+            struct heap_allocator_impl
+            {
+                static constexpr auto Baselib_Memory_Allocate = ::Baselib_Memory_Allocate;
+                static constexpr auto Baselib_Memory_Reallocate = ::Baselib_Memory_Reallocate;
+                static constexpr auto Baselib_Memory_Free = ::Baselib_Memory_Free;
+                static constexpr auto Baselib_Memory_AlignedAllocate = ::Baselib_Memory_AlignedAllocate;
+                static constexpr auto Baselib_Memory_AlignedReallocate = ::Baselib_Memory_AlignedReallocate;
+                static constexpr auto Baselib_Memory_AlignedFree = ::Baselib_Memory_AlignedFree;
+            };
+
+            // Test memory allocation methods
+            struct heap_allocator_impl_test
+            {
+                static void* Baselib_Memory_Allocate(size_t);
+                static void* Baselib_Memory_Reallocate(void*, size_t);
+                static void  Baselib_Memory_Free(void*);
+                static void* Baselib_Memory_AlignedAllocate(size_t, size_t);
+                static void* Baselib_Memory_AlignedReallocate(void*, size_t, size_t);
+                static void  Baselib_Memory_AlignedFree(void*);
+            };
+
+            template<uint32_t alignment>
+            class heap_allocator
+            {
+                // Use test memory allocation implementation if detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL is true, otherwise Baselib_Memory_*
+                using BaseImpl = typename std::conditional<detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL, heap_allocator_impl_test, heap_allocator_impl>::type;
+
+                // Memory allocation functions - alignment requirements <= Baselib_Memory_MinGuaranteedAlignment
+                struct MinAlignedImpl
+                {
+                    static void* allocate(size_t size, Baselib_ErrorState *error_state_ptr)
+                    {
+                        UNUSED(error_state_ptr);
+                        return BaseImpl::Baselib_Memory_Allocate(size);
+                    }
+
+                    static void* reallocate(void* ptr, size_t old_size, size_t new_size, Baselib_ErrorState *error_state_ptr)
+                    {
+                        UNUSED(error_state_ptr);
+                        UNUSED(old_size);
+                        return BaseImpl::Baselib_Memory_Reallocate(ptr, new_size);
+                    }
+
+                    static bool deallocate(void* ptr, size_t size, Baselib_ErrorState *error_state_ptr)
+                    {
+                        UNUSED(error_state_ptr);
+                        UNUSED(size);
+                        BaseImpl::Baselib_Memory_Free(ptr);
+                        return true;
+                    }
+                };
+
+                // Aligned memory allocation functions - alignment requirements > Baselib_Memory_MinGuaranteedAlignment
+                struct AlignedImpl
+                {
+                    static void* allocate(size_t size, Baselib_ErrorState *error_state_ptr)
+                    {
+                        UNUSED(error_state_ptr);
+                        return BaseImpl::Baselib_Memory_AlignedAllocate(size, alignment);
+                    }
+
+                    static void* reallocate(void* ptr, size_t old_size, size_t new_size, Baselib_ErrorState *error_state_ptr)
+                    {
+                        UNUSED(error_state_ptr);
+                        UNUSED(old_size);
+                        return BaseImpl::Baselib_Memory_AlignedReallocate(ptr, new_size, alignment);
+                    }
+
+                    static bool deallocate(void* ptr, size_t size, Baselib_ErrorState *error_state_ptr)
+                    {
+                        UNUSED(error_state_ptr);
+                        UNUSED(size);
+                        BaseImpl::Baselib_Memory_AlignedFree(ptr);
+                        return true;
+                    }
+                };
+
+                static FORCE_INLINE constexpr size_t AlignedSize(size_t size)
+                {
+                    return (size + alignment - 1) & ~(alignment - 1);
+                }
+
+            public:
+                static constexpr size_t max_alignment = Baselib_Memory_MaxAlignment;
+
+                static constexpr size_t optimal_size(size_t size)
+                {
+                    return AlignedSize(size);
+                }
+
+                // Use aligned memory allocations methods if alignment > Baselib_Memory_MinGuaranteedAlignment
+                using Impl = typename std::conditional<(alignment > Baselib_Memory_MinGuaranteedAlignment), AlignedImpl, MinAlignedImpl>::type;
+
+                static void* allocate(size_t size, Baselib_ErrorState* error_state_ptr)
+                {
+                    return Impl::allocate(size, error_state_ptr);
+                }
+
+                static void* reallocate(void* ptr, size_t old_size, size_t new_size, Baselib_ErrorState* error_state_ptr)
+                {
+                    return Impl::reallocate(ptr, old_size, new_size, error_state_ptr);
+                }
+
+                static bool deallocate(void* ptr, size_t size, Baselib_ErrorState* error_state_ptr)
+                {
+                    return Impl::deallocate(ptr, size, error_state_ptr);
+                }
+            };
+        }
+    }
+}
+
+#undef detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL
--- a/Libraries/external/baselib/Include/Cpp/Internal/page_allocator.inl.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/page_allocator.inl.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include "../../C/Baselib_Memory.h"
+#include "../../Cpp/Algorithm.h"
+
+// Internal, to enable override of default C Api implementation for unit-tests
+#ifndef detail_BASELIB_PAGE_ALLOCATOR_TEST_IMPL
+#define detail_BASELIB_PAGE_ALLOCATOR_TEST_IMPL 0
+#endif
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        namespace detail
+        {
+            // Default memory allocation methods
+            struct page_allocator_impl
+            {
+                static constexpr auto Baselib_Memory_AllocatePages = ::Baselib_Memory_AllocatePages;
+                static constexpr auto Baselib_Memory_ReleasePages = ::Baselib_Memory_ReleasePages;
+                static constexpr auto Baselib_Memory_SetPageState = ::Baselib_Memory_SetPageState;
+            };
+
+            // Test memory allocation methods
+            struct page_allocator_impl_test
+            {
+                static Baselib_Memory_PageAllocation Baselib_Memory_AllocatePages(uint64_t pageSize, uint64_t pageCount, uint64_t alignmentInMultipleOfPageSize, Baselib_Memory_PageState pageState, Baselib_ErrorState* errorState);
+                static void Baselib_Memory_ReleasePages(Baselib_Memory_PageAllocation pageAllocation, Baselib_ErrorState* errorState);
+                static void Baselib_Memory_SetPageState(void* addressOfFirstPage, uint64_t pageSize, uint64_t pageCount, Baselib_Memory_PageState pageState, Baselib_ErrorState* errorState);
+            };
+
+            typedef enum Memory_PageState : int
+            {
+                Memory_PageState_Reserved             = Baselib_Memory_PageState_Reserved,
+                Memory_PageState_NoAccess             = Baselib_Memory_PageState_NoAccess,
+                Memory_PageState_ReadOnly             = Baselib_Memory_PageState_ReadOnly,
+                Memory_PageState_ReadWrite            = Baselib_Memory_PageState_ReadWrite,
+                Memory_PageState_ReadOnly_Executable  = Baselib_Memory_PageState_ReadOnly_Executable | Baselib_Memory_PageState_ReadOnly,
+                Memory_PageState_ReadWrite_Executable = Baselib_Memory_PageState_ReadWrite_Executable | Baselib_Memory_PageState_ReadWrite,
+            } Memory_PageState;
+
+            template<uint32_t alignment>
+            class page_allocator
+            {
+                // Use test memory allocation implementation if detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL is true
+                using Impl = typename std::conditional<detail_BASELIB_PAGE_ALLOCATOR_TEST_IMPL, page_allocator_impl_test, page_allocator_impl>::type;
+
+                const size_t m_PageSize;
+                const size_t m_PageSizeAligned;
+
+                FORCE_INLINE constexpr size_t PagedCountFromSize(size_t size) const
+                {
+                    return (size + (m_PageSize - 1)) / m_PageSize;
+                }
+
+                FORCE_INLINE size_t DefaultPageSize() const
+                {
+                    Baselib_Memory_PageSizeInfo info;
+                    Baselib_Memory_GetPageSizeInfo(&info);
+                    return static_cast<size_t>(info.defaultPageSize);
+                }
+
+            public:
+                page_allocator() : page_allocator(DefaultPageSize()) {}
+                page_allocator(size_t page_size) : m_PageSize(page_size), m_PageSizeAligned(page_size > alignment ? page_size : alignment) {}
+
+                void* allocate(size_t size, int state, Baselib_ErrorState *error_state_ptr) const
+                {
+                    Baselib_Memory_PageAllocation pa = Impl::Baselib_Memory_AllocatePages(m_PageSize, PagedCountFromSize(size), m_PageSizeAligned / m_PageSize, (Baselib_Memory_PageState)state, error_state_ptr);
+                    return pa.ptr;
+                }
+
+                bool deallocate(void* ptr, size_t size, Baselib_ErrorState *error_state_ptr) const
+                {
+                    Impl::Baselib_Memory_ReleasePages({ptr, m_PageSize, PagedCountFromSize(size)}, error_state_ptr);
+                    return (error_state_ptr->code == Baselib_ErrorCode_Success);
+                }
+
+                constexpr size_t optimal_size(size_t size) const
+                {
+                    return (size + m_PageSizeAligned - 1) & ~(m_PageSizeAligned - 1);
+                }
+
+                bool set_page_state(void* ptr, size_t size, int state, Baselib_ErrorState *error_state_ptr) const
+                {
+                    Impl::Baselib_Memory_SetPageState(ptr, m_PageSize, PagedCountFromSize(size), (Baselib_Memory_PageState)state, error_state_ptr);
+                    return (error_state_ptr->code == Baselib_ErrorCode_Success);
+                }
+            };
+        }
+    }
+}
+
+#undef detail_BASELIB_PAGE_ALLOCATOR_TEST_IMPL
--- a/Libraries/external/baselib/Include/Cpp/Internal/tlsf_allocator.inl.h
+++ b/Libraries/external/baselib/Include/Cpp/Internal/tlsf_allocator.inl.h
@@ -0,0 +1,365 @@
+#pragma once
+
+#include "../Lock.h"
+#include "../mpmc_node_queue.h"
+#include "../Algorithm.h"
+#include <algorithm>
+#include <type_traits>
+#include <cstring>
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        namespace detail
+        {
+            template<class Allocator>
+            class tlsf_block_allocator
+            {
+                baselib::Lock m_CapacityLock;
+                ALIGNED_ATOMIC(size_t) m_Capacity;
+                baselib::mpmc_node_queue<baselib::mpmc_node> m_FreeBlocks;
+
+                struct Segment
+                {
+                    uintptr_t data;
+                    size_t size;
+                    Segment *next;
+                } *m_Segments;
+
+                void LinkSegment(Segment* segment, const size_t block_size, size_t block_count)
+                {
+                    uintptr_t nodeData = segment->data;
+                    baselib::mpmc_node* firstNode = reinterpret_cast<baselib::mpmc_node*>(nodeData);
+                    baselib::mpmc_node* node = firstNode;
+                    for (size_t i = 0; i < block_count; ++i)
+                    {
+                        node = reinterpret_cast<baselib::mpmc_node*>(nodeData);
+                        nodeData += block_size;
+                        node->next.obj = reinterpret_cast<baselib::mpmc_node*>(nodeData);
+                    }
+                    m_FreeBlocks.push_back(firstNode, node);
+                }
+
+                bool ExpandCapacity(size_t size, size_t block_size, Allocator& allocator)
+                {
+                    if (size == 0)
+                        return true;
+
+                    // Align to underlying allocator alignment. Size requested must also be of at least block_size
+                    block_size = baselib::Algorithm::CeilAligned(block_size, alignment);
+                    size = std::max(baselib::Algorithm::CeilAligned(size, alignment), block_size);
+
+                    // Consider base allocator optimal size from required size. I.e if higher than size requested, expand using optimal size.
+                    const size_t minSize = size + sizeof(Segment);
+                    const size_t optimalSize = allocator.optimal_size(minSize);
+                    const size_t segment_size = std::max(optimalSize, minSize);
+                    const size_t block_count = size / block_size;
+
+                    // Allocate one memory block that contains block data and Segment info.
+                    uintptr_t segmentMemory = reinterpret_cast<uintptr_t>(allocator.allocate(segment_size));
+                    if (segmentMemory == 0)
+                        return false;
+
+                    // Store data ptr and size information in segment header
+                    Segment* segment = reinterpret_cast<Segment*>(segmentMemory + size);
+                    segment->data = segmentMemory;
+                    segment->size = segment_size;
+
+                    // Link segment to existing segments and add capacity.
+                    // This function is in the scope of a locked `m_CapacityLock` which has an implicit acquire (lock) release (unlock) barrier.
+                    // Order of m_Segments and m_Capacity is irrelevant. Calling `allocate` from other threads may result in a successful allocation but
+                    // that is not a problem since this process repeats in the case of being called from `allocate` and container is pre-emtped.
+                    // The side effect of not
+                    segment->next = m_Segments;
+                    m_Segments = segment;
+                    LinkSegment(segment, block_size, block_count);
+                    baselib::atomic_fetch_add_explicit(m_Capacity, block_size * block_count, baselib::memory_order_relaxed);
+                    return true;
+                }
+
+            public:
+                static constexpr uint32_t alignment = Allocator::alignment;
+
+                // non-copyable
+                tlsf_block_allocator(const tlsf_block_allocator& other) = delete;
+                tlsf_block_allocator& operator=(const tlsf_block_allocator& other) = delete;
+
+                // non-movable (strictly speaking not needed but listed to signal intent)
+                tlsf_block_allocator(tlsf_block_allocator&& other) = delete;
+                tlsf_block_allocator& operator=(tlsf_block_allocator&& other) = delete;
+
+                tlsf_block_allocator() : m_CapacityLock(), m_Capacity(0), m_FreeBlocks(), m_Segments(nullptr)  {}
+
+                void* allocate()
+                {
+                    return m_FreeBlocks.try_pop_front();
+                }
+
+                bool deallocate(void* ptr)
+                {
+                    m_FreeBlocks.push_back(reinterpret_cast<baselib::mpmc_node*>(ptr));
+                    return true;
+                }
+
+                bool deallocate(void* ptr_first, void* ptr_last)
+                {
+                    m_FreeBlocks.push_back(reinterpret_cast<baselib::mpmc_node*>(ptr_first), reinterpret_cast<baselib::mpmc_node*>(ptr_last));
+                    return true;
+                }
+
+                void deallocate_segments(Allocator& allocator)
+                {
+                    Segment *segment = m_Segments;
+                    while (segment)
+                    {
+                        Segment *nextSegment = segment->next;
+                        allocator.deallocate(reinterpret_cast<void *>(segment->data), segment->size);
+                        segment = nextSegment;
+                    }
+                }
+
+                void reset_segments()
+                {
+                    if (m_Segments)
+                    {
+                        m_Segments = nullptr;
+                        m_Capacity = 0;
+                        m_FreeBlocks.~mpmc_node_queue<baselib::mpmc_node>();
+                        new(&m_FreeBlocks) mpmc_node_queue<baselib::mpmc_node>();
+                    }
+                }
+
+                bool reserve(size_t size, size_t capacity, Allocator& allocator)
+                {
+                    bool result;
+                    m_CapacityLock.AcquireScoped([&] {
+                        result = capacity > m_Capacity ? ExpandCapacity(capacity - m_Capacity, size, allocator) : true;
+                    });
+                    return result;
+                }
+
+                bool increase_capacity(size_t size, Allocator& allocator)
+                {
+                    bool result = true;
+                    m_CapacityLock.AcquireScoped([&] {
+                        if (m_FreeBlocks.empty())
+                            result = ExpandCapacity(m_Capacity == 0 ? size : m_Capacity, size, allocator);
+                    });
+                    return result;
+                }
+
+                size_t capacity() const
+                {
+                    return baselib::atomic_load_explicit(m_Capacity, baselib::memory_order_relaxed);
+                }
+
+                static constexpr size_t optimal_size(const size_t size)
+                {
+                    return baselib::Algorithm::CeilAligned(size, alignment);
+                }
+            };
+
+            template<size_t min_size, size_t max_size, size_t linear_subdivisions, class BaseAllocator>
+            class tlsf_allocator : private BaseAllocator
+            {
+                using BlockAllocator = detail::tlsf_block_allocator<BaseAllocator>;
+
+            public:
+                static constexpr uint32_t alignment = BaseAllocator::alignment;
+
+                // non-copyable
+                tlsf_allocator(const tlsf_allocator& other) = delete;
+                tlsf_allocator& operator=(const tlsf_allocator& other) = delete;
+
+                // non-movable (strictly speaking not needed but listed to signal intent)
+                tlsf_allocator(tlsf_allocator&& other) = delete;
+                tlsf_allocator& operator=(tlsf_allocator&& other) = delete;
+
+                tlsf_allocator() : m_Allocators() {}
+                ~tlsf_allocator() { DeallocateSegmentsImpl(); }
+
+                void* try_allocate(size_t size)
+                {
+                    return getAllocator(size).allocate();
+                }
+
+                void* allocate(size_t size)
+                {
+                    BlockAllocator& allocator = getAllocator(size);
+                    do
+                    {
+                        void* p;
+                        if (OPTIMIZER_LIKELY(p = allocator.allocate()))
+                            return p;
+                        if (!allocator.increase_capacity(AllocatorSize(size), static_cast<BaseAllocator&>(*this)))
+                            return nullptr;
+                    }
+                    while (true);
+                }
+
+                void* try_reallocate(void* ptr, size_t old_size, size_t new_size)
+                {
+                    return ReallocateImpl<true>(ptr, old_size, new_size);
+                }
+
+                void* reallocate(void* ptr, size_t old_size, size_t new_size)
+                {
+                    return ReallocateImpl<false>(ptr, old_size, new_size);
+                }
+
+                bool deallocate(void* ptr, size_t size)
+                {
+                    return ptr == nullptr ? true : getAllocator(size).deallocate(ptr);
+                }
+
+                void deallocate_all()
+                {
+                    atomic_thread_fence(memory_order_acquire);
+                    DeallocateSegmentsImpl();
+                    for (auto& pow2Allocators : m_Allocators)
+                        for (auto& blockAllocator : pow2Allocators)
+                            blockAllocator.reset_segments();
+                    atomic_thread_fence(memory_order_release);
+                }
+
+                bool batch_deallocate(void* ptr_first, void* ptr_last, size_t size)
+                {
+                    return ((ptr_first ==  nullptr) || (ptr_last == nullptr)) ? false : getAllocator(size).deallocate(ptr_first, ptr_last);
+                }
+
+                void batch_deallocate_link(void* ptr, void* ptr_next)
+                {
+                    reinterpret_cast<baselib::mpmc_node*>(ptr)->next = reinterpret_cast<baselib::mpmc_node*>(ptr_next);
+                }
+
+                bool reserve(size_t size, size_t capacity)
+                {
+                    return getAllocator(size).reserve(AllocatorSize(size), capacity, static_cast<BaseAllocator&>(*this));
+                }
+
+                size_t capacity(size_t size)
+                {
+                    return getAllocator(size).capacity();
+                }
+
+                static constexpr size_t optimal_size(const size_t size)
+                {
+                    return size == 0 ? 0 : BlockAllocator::optimal_size(AllocatorSize(size));
+                }
+
+            private:
+                struct CompileTime
+                {
+                    static constexpr size_t Log2Base(size_t value, size_t offset) { return (value > 1) ? Log2Base(value >> (size_t)1, offset + 1) : offset; }
+                    static constexpr size_t Log2Base(size_t value) { return Log2Base(value, 0); }
+                    static constexpr size_t Max(size_t a, size_t b) { return a > b ? a : b; }
+                };
+
+                static constexpr size_t m_MinSize = CompileTime::Max(min_size, CompileTime::Max(CompileTime::Max(sizeof(void*), linear_subdivisions), alignment));
+                static constexpr size_t m_MinSizePow2 = baselib::Algorithm::CeilPowerOfTwo(m_MinSize);
+                static constexpr size_t m_MaxSizePow2 = baselib::Algorithm::CeilPowerOfTwo(CompileTime::Max(max_size, m_MinSize));
+                static constexpr size_t m_MinSizeMask = static_cast<size_t>(1) << CompileTime::Log2Base(m_MinSizePow2 - 1);
+                static constexpr size_t m_AllocatorCount = (CompileTime::Log2Base(m_MaxSizePow2) - CompileTime::Log2Base(m_MinSizePow2)) + 1;
+                static constexpr size_t m_AllocatorBaseOffsetLog2 =  CompileTime::Log2Base(m_MinSizePow2) - 1;
+                static constexpr size_t m_LinearSubdivisionsLog2 = CompileTime::Log2Base(linear_subdivisions);
+
+                static constexpr size_t AllocatorSizeLog2(size_t size) { return baselib::Algorithm::HighestBitNonZero(size | m_MinSizeMask); }
+                static constexpr size_t LinearAllocatorSizeLog2(size_t size, size_t sizeLog2) { return (size & ((size_t)1 << sizeLog2) - 1) >> (sizeLog2 - m_LinearSubdivisionsLog2); }
+
+                template<int value = ((m_AllocatorCount == 1 && linear_subdivisions == 1) ? 1 : 2), typename std::enable_if<(value == 1), int>::type = 0>
+                static constexpr FORCE_INLINE size_t AllocatorSize(size_t size)
+                {
+                    return m_MinSizePow2;
+                }
+
+                template<int value = ((m_AllocatorCount != 1 && linear_subdivisions == 1) ? 3 : 4), typename std::enable_if<(value == 3), int>::type = 0>
+                static constexpr FORCE_INLINE size_t AllocatorSize(size_t size)
+                {
+                    return (size_t)1 << (AllocatorSizeLog2(size - 1) + 1);
+                }
+
+                template<int value = (linear_subdivisions == 1) ? 0 : 1, typename std::enable_if<(value), int>::type = 0>
+                static FORCE_INLINE size_t AllocatorSize(size_t size)
+                {
+                    const size_t subDivSize = ((size_t)1 << baselib::Algorithm::HighestBitNonZero(size)) >> m_LinearSubdivisionsLog2;
+                    return (size - 1 & ~(subDivSize - 1)) + subDivSize;
+                }
+
+                template<int value = ((m_AllocatorCount == 1 && linear_subdivisions == 1) ? 1 : 2), typename std::enable_if<(value == 1), int>::type = 0>
+                BlockAllocator& getAllocator(size_t)
+                {
+                    return m_Allocators[0][0];
+                }
+
+                template<int value = ((m_AllocatorCount != 1 && linear_subdivisions == 1) ? 3 : 4), typename std::enable_if<(value == 3), int>::type = 0>
+                BlockAllocator& getAllocator(const size_t size)
+                {
+                    return m_Allocators[AllocatorSizeLog2(size - 1) - m_AllocatorBaseOffsetLog2][0];
+                }
+
+                template<int value = ((m_AllocatorCount == 1 && linear_subdivisions != 1) ? 5 : 6), typename std::enable_if<(value == 5), int>::type = 0>
+                BlockAllocator& getAllocator(size_t size)
+                {
+                    --size;
+                    return m_Allocators[0][LinearAllocatorSizeLog2(size, AllocatorSizeLog2(size))];
+                }
+
+                template<int value = ((m_AllocatorCount != 1 && linear_subdivisions != 1) ? 7 : 8), typename std::enable_if<(value == 7), int>::type = 0>
+                BlockAllocator& getAllocator(size_t size)
+                {
+                    --size;
+                    const size_t sizeLog2 = AllocatorSizeLog2(size);
+                    return m_Allocators[sizeLog2 - m_AllocatorBaseOffsetLog2][LinearAllocatorSizeLog2(size, sizeLog2)];
+                }
+
+                template<typename T> struct has_deallocate_all
+                {
+                    template<typename U, void (U::*)()> struct Check;
+                    template<typename U> static constexpr bool test(Check<U, &U::deallocate_all> *) { return true; }
+                    template<typename U> static constexpr bool test(...) { return false; }
+                    static constexpr bool value = test<T>(nullptr);
+                };
+
+                template<bool value = has_deallocate_all<BaseAllocator>::value, typename std::enable_if<(value), int>::type = 0>
+                void DeallocateSegmentsImpl()
+                {
+                    BaseAllocator::deallocate_all();
+                }
+
+                template<bool value = has_deallocate_all<BaseAllocator>::value, typename std::enable_if<(!value), int>::type = 0>
+                void DeallocateSegmentsImpl()
+                {
+                    for (auto& pow2Allocators : m_Allocators)
+                        for (auto& blockAllocator : pow2Allocators)
+                            blockAllocator.deallocate_segments(static_cast<BaseAllocator&>(*this));
+                }
+
+                template<bool use_try_allocate>
+                void* ReallocateImpl(void* ptr, size_t old_size, size_t new_size)
+                {
+                    if (ptr == nullptr)
+                        return use_try_allocate ? try_allocate(new_size) : allocate(new_size);
+
+                    BlockAllocator& oldAllocator = getAllocator(old_size);
+                    BlockAllocator& newAllocator = getAllocator(new_size);
+                    if (&oldAllocator == &newAllocator)
+                        return ptr;
+
+                    void* newPtr = newAllocator.allocate();
+                    if ((!use_try_allocate) && (newPtr == nullptr))
+                        newPtr = allocate(new_size);
+
+                    if (newPtr)
+                    {
+                        std::memcpy(newPtr, ptr, std::min(new_size, old_size));
+                        oldAllocator.deallocate(ptr);
+                    }
+                    return newPtr;
+                }
+
+                BlockAllocator m_Allocators[m_AllocatorCount][linear_subdivisions];
+            };
+        }
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Lock.h
+++ b/Libraries/external/baselib/Include/Cpp/Lock.h
@@ -0,0 +1,172 @@
+#pragma once
+
+#include "../C/Baselib_Lock.h"
+#include "Time.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, a lock or mutex (from mutual exclusion) is a synchronization mechanism for enforcing limits on access to a resource in an environment
+        // where there are many threads of execution. A lock is designed to enforce a mutual exclusion concurrency control policy.
+        //
+        // "Lock (computer science)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/w/index.php?title=Lock_(computer_science)&oldid=875674239
+        class Lock
+        {
+        public:
+            // non-copyable
+            Lock(const Lock& other) = delete;
+            Lock& operator=(const Lock& other) = delete;
+
+            // non-movable (strictly speaking not needed but listed to signal intent)
+            Lock(Lock&& other) = delete;
+            Lock& operator=(Lock&& other) = delete;
+
+            // Creates a lock synchronization primitive.
+            // If there are not enough system resources to create a lock, process abort is triggered.
+            Lock() : m_LockData(Baselib_Lock_Create())
+            {
+            }
+
+            // Reclaim resources and memory held by lock.
+            // If threads are waiting on the lock, calling free may trigger an assert and may cause process abort.
+            ~Lock()
+            {
+                Baselib_Lock_Free(&m_LockData);
+            }
+
+            // Acquire lock.
+            //
+            // If lock is held, either by this or another thread, then the function wait for lock to be released.
+            //
+            // This function is guaranteed to emit an acquire barrier.
+            inline void Acquire()
+            {
+                return Baselib_Lock_Acquire(&m_LockData);
+            }
+
+            // Try to acquire lock and return immediately.
+            // If lock is held, either by this or another thread, then lock is not acquired and function return false.
+            //
+            // When a lock is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // Return:          true if lock was acquired.
+            COMPILER_WARN_UNUSED_RESULT
+            FORCE_INLINE bool TryAcquire()
+            {
+                return Baselib_Lock_TryAcquire(&m_LockData);
+            }
+
+            // Try to acquire lock.
+            // If lock is held, either by this or another thread, then the function wait for timeoutInMilliseconds for lock to be released.
+            //
+            // When a lock is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // TryAcquire with a zero timeout differs from TryAcquire() in that TryAcquire() is guaranteed to be a user space operation
+            // while TryAcquire with zero timeout may enter the kernel and cause a context switch.
+            //
+            // Timeout passed to this function may be subject to system clock resolution.
+            // If the system clock has a resolution of e.g. 16ms that means this function may exit with a timeout error 16ms earlier than originally scheduled.
+            //
+            // Return:          true if lock was acquired.
+            COMPILER_WARN_UNUSED_RESULT
+            FORCE_INLINE bool TryTimedAcquire(const timeout_ms timeoutInMilliseconds)
+            {
+                return Baselib_Lock_TryTimedAcquire(&m_LockData, timeoutInMilliseconds.count());
+            }
+
+            // Release lock and make it available to other threads.
+            //
+            // This function can be called from any thread, not only the thread that acquired the lock.
+            // If no lock was previously held calling this function result in a no-op.
+            //
+            // When the lock is released this function is guaranteed to emit a release barrier.
+            FORCE_INLINE void Release()
+            {
+                return Baselib_Lock_Release(&m_LockData);
+            }
+
+            // Acquire lock and invoke user defined function.
+            // If lock is held, either by this or another thread, then the function wait for lock to be released.
+            //
+            // When a lock is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // Example usage:
+            //  lock.AcquireScoped([] {
+            //      enteredCriticalSection++;
+            //  });
+            template<class FunctionType>
+            FORCE_INLINE void AcquireScoped(const FunctionType& func)
+            {
+                ReleaseOnDestroy releaseScope(*this);
+                Acquire();
+                func();
+            }
+
+            // Try to acquire lock and invoke user defined function.
+            // If lock is held, either by this or another thread, then lock is not acquired and function return false.
+            // On failure to obtain lock the user defined function is not invoked.
+            //
+            // When a lock is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // Example usage:
+            //  lock.TryAcquireScoped([] {
+            //      enteredCriticalSection++;
+            //  });
+            //
+            // Return:          true if lock was acquired.
+            template<class FunctionType>
+            FORCE_INLINE bool TryAcquireScoped(const FunctionType& func)
+            {
+                if (TryAcquire())
+                {
+                    ReleaseOnDestroy releaseScope(*this);
+                    func();
+                    return true;
+                }
+                return false;
+            }
+
+            // Try to acquire lock and invoke user defined function.
+            // If lock is held, either by this or another thread, then the function wait for timeoutInMilliseconds for lock to be released.
+            // On failure to obtain lock the user defined function is not invoked.
+            //
+            // When a lock is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // Timeout passed to this function may be subject to system clock resolution.
+            // If the system clock has a resolution of e.g. 16ms that means this function may exit with a timeout error 16ms earlier than originally scheduled.
+            //
+            // Example usage:
+            //  bool lockAcquired = lock.TryTimedAcquireScoped(std::chrono::minutes(1), [] {
+            //      enteredCriticalSection++;
+            //  });
+            //  assert(lockAcquired);
+            //
+            // Return:          true if lock was acquired.
+            template<class FunctionType>
+            FORCE_INLINE bool TryTimedAcquireScoped(const timeout_ms timeoutInMilliseconds, const FunctionType& func)
+            {
+                if (TryTimedAcquire(timeoutInMilliseconds))
+                {
+                    ReleaseOnDestroy releaseScope(*this);
+                    func();
+                    return true;
+                }
+                return false;
+            }
+
+        private:
+            class ReleaseOnDestroy
+            {
+            public:
+                FORCE_INLINE ReleaseOnDestroy(Lock& lockReference) : m_LockReference(lockReference) {}
+                FORCE_INLINE ~ReleaseOnDestroy() { m_LockReference.Release(); }
+            private:
+                Lock& m_LockReference;
+            };
+
+            Baselib_Lock   m_LockData;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/ReentrantLock.h
+++ b/Libraries/external/baselib/Include/Cpp/ReentrantLock.h
@@ -0,0 +1,186 @@
+#pragma once
+
+#include "../C/Baselib_ReentrantLock.h"
+#include "Time.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, the reentrant mutex (recursive mutex, recursive lock) is particular type of mutual exclusion (mutex) device that may be locked multiple
+        // times by the same process/thread, without causing a deadlock.
+
+        // While any attempt to perform the "lock" operation on an ordinary mutex (lock) would either fail or block when the mutex is already locked, on a recursive
+        // mutex this operation will succeed if and only if the locking thread is the one that already holds the lock. Typically, a recursive mutex tracks the number
+        // of times it has been locked, and requires equally many unlock operations to be performed before other threads may lock it.
+        //
+        // "Reentrant mutex", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/w/index.php?title=Reentrant_mutex&oldid=818566928
+        //
+        // For optimal performance, baselib::ReentrantLock should be stored at a cache aligned memory location.
+        class ReentrantLock
+        {
+        public:
+            // non-copyable
+            ReentrantLock(const ReentrantLock& other) = delete;
+            ReentrantLock& operator=(const ReentrantLock& other) = delete;
+
+            // non-movable (strictly speaking not needed but listed to signal intent)
+            ReentrantLock(ReentrantLock&& other) = delete;
+            ReentrantLock& operator=(ReentrantLock&& other) = delete;
+
+            // Creates a reentrant lock synchronization primitive.
+            // If there are not enough system resources to create a lock, process abort is triggered.
+            ReentrantLock() : m_ReentrantLockData(Baselib_ReentrantLock_Create())
+            {
+            }
+
+            // Reclaim resources and memory held by lock.
+            //
+            // If threads are waiting on the lock, calling free may trigger an assert and may cause process abort.
+            // Calling this function with a nullptr result in a no-op
+            ~ReentrantLock()
+            {
+                Baselib_ReentrantLock_Free(&m_ReentrantLockData);
+            }
+
+            // Acquire lock.
+            //
+            // If lock is already acquired by the current thread this function increase the lock count so that an equal number of calls to Baselib_ReentrantLock_Release needs
+            // to be made before the lock is released.
+            // If lock is held by another thread, this function wait for lock to be released.
+            //
+            // This function is guaranteed to emit an acquire barrier.
+            inline void Acquire()
+            {
+                return Baselib_ReentrantLock_Acquire(&m_ReentrantLockData);
+            }
+
+            // Try to acquire lock and return immediately.
+            // If lock is already acquired by the current thread this function increase the lock count so that an equal number of calls to Baselib_ReentrantLock_Release needs
+            // to be made before the lock is released.
+            //
+            // When lock is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // Return:          true if lock was acquired.
+            COMPILER_WARN_UNUSED_RESULT
+            FORCE_INLINE bool TryAcquire()
+            {
+                return Baselib_ReentrantLock_TryAcquire(&m_ReentrantLockData);
+            }
+
+            // Try to acquire lock.
+            // If lock is already acquired by the current thread this function increase the lock count so that an equal number of calls to Baselib_ReentrantLock_Release needs
+            // to be made before the lock is released.
+            // If lock is held by another thread, this function wait for timeoutInMilliseconds for lock to be released.
+            //
+            // When lock is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // TryAcquire with a zero timeout differs from TryAcquire() in that TryAcquire() is guaranteed to be a user space operation
+            // while TryAcquire with zero timeout may enter the kernel and cause a context switch.
+            //
+            // Timeout passed to this function may be subject to system clock resolution.
+            // If the system clock has a resolution of e.g. 16ms that means this function may exit with a timeout error 16ms earlier than originally scheduled.
+            //
+            // Return:          true if lock was acquired.
+            COMPILER_WARN_UNUSED_RESULT
+            FORCE_INLINE bool TryTimedAcquire(const timeout_ms timeoutInMilliseconds)
+            {
+                return Baselib_ReentrantLock_TryTimedAcquire(&m_ReentrantLockData, timeoutInMilliseconds.count());
+            }
+
+            // Release lock.
+            // If lock count is still higher than zero after the release operation then lock remain in a locked state.
+            // If lock count reach zero the lock is unlocked and made available to other threads
+            //
+            // When the lock is released this function is guaranteed to emit a release barrier.
+            //
+            // Calling this function from a thread that doesn't own the lock triggers an assert in debug and causes undefined behavior in release builds.
+            FORCE_INLINE void Release()
+            {
+                return Baselib_ReentrantLock_Release(&m_ReentrantLockData);
+            }
+
+            // Acquire lock and invoke user defined function.
+            // If lock is held by another thread, this function wait for lock to be released.
+            //
+            // When a lock is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // Example usage:
+            //  lock.AcquireScoped([] {
+            //      enteredCriticalSection++;
+            //  });
+            template<class FunctionType>
+            FORCE_INLINE void AcquireScoped(const FunctionType& func)
+            {
+                ReleaseOnDestroy releaseScope(*this);
+                Acquire();
+                func();
+            }
+
+            // Try to acquire lock and invoke user defined function.
+            // If lock is held by another thread, this function wait for timeoutInMilliseconds for lock to be released.
+            // On failure to obtain lock the user defined function is not invoked.
+            //
+            // When lock is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // Example usage:
+            //  lock.TryAcquireScoped([] {
+            //      enteredCriticalSection++;
+            //  });
+            //
+            // Return:          true if lock was acquired.
+            template<class FunctionType>
+            FORCE_INLINE bool TryAcquireScoped(const FunctionType& func)
+            {
+                if (TryAcquire())
+                {
+                    ReleaseOnDestroy releaseScope(*this);
+                    func();
+                    return true;
+                }
+                return false;
+            }
+
+            // Try to acquire lock and invoke user defined function.
+            // If lock is held by another thread, this function wait for timeoutInMilliseconds for lock to be released.
+            // On failure to obtain lock the user defined function is not invoked.
+            //
+            // When lock is acquired this function is guaranteed to emit an acquire barrier.
+            //
+            // Timeout passed to this function may be subject to system clock resolution.
+            // If the system clock has a resolution of e.g. 16ms that means this function may exit with a timeout error 16ms earlier than originally scheduled.
+            //
+            // Example usage:
+            //  bool lockAcquired = lock.TryTimedAcquireScoped(std::chrono::minutes(1), [] {
+            //      enteredCriticalSection++;
+            //  });
+            //  assert(lockAcquired);
+            //
+            // Return:          true if lock was acquired.
+            template<class FunctionType>
+            FORCE_INLINE bool TryTimedAcquireScoped(const timeout_ms timeoutInMilliseconds, const FunctionType& func)
+            {
+                if (TryTimedAcquire(timeoutInMilliseconds))
+                {
+                    ReleaseOnDestroy releaseScope(*this);
+                    func();
+                    return true;
+                }
+                return false;
+            }
+
+        private:
+            class ReleaseOnDestroy
+            {
+            public:
+                FORCE_INLINE ReleaseOnDestroy(ReentrantLock& lockReference) : m_LockReference(lockReference) {}
+                FORCE_INLINE ~ReleaseOnDestroy() { m_LockReference.Release(); }
+            private:
+                ReentrantLock& m_LockReference;
+            };
+
+            Baselib_ReentrantLock   m_ReentrantLockData;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Semaphore.h
+++ b/Libraries/external/baselib/Include/Cpp/Semaphore.h
@@ -0,0 +1,115 @@
+#pragma once
+
+#include "../C/Baselib_Semaphore.h"
+#include "Time.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, a semaphore is a variable or abstract data type used to control access to a common resource by multiple processes in a concurrent
+        // system such as a multitasking operating system. A semaphore is simply a variable. This variable is used to solve critical section problems and to achieve
+        // process synchronization in the multi processing environment. A trivial semaphore is a plain variable that is changed (for example, incremented or
+        // decremented, or toggled) depending on programmer-defined conditions.
+        //
+        // A useful way to think of a semaphore as used in the real-world system is as a record of how many units of a particular resource are available, coupled with
+        // operations to adjust that record safely (i.e. to avoid race conditions) as units are required or become free, and, if necessary, wait until a unit of the
+        // resource becomes available.
+        //
+        // "Semaphore (programming)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/w/index.php?title=Semaphore_(programming)&oldid=872408126
+        //
+        // For optimal performance, baselib::Semaphore should be stored at a cache aligned memory location.
+        class Semaphore
+        {
+        public:
+            // non-copyable
+            Semaphore(const Semaphore& other) = delete;
+            Semaphore& operator=(const Semaphore& other) = delete;
+
+            // non-movable (strictly speaking not needed but listed to signal intent)
+            Semaphore(Semaphore&& other) = delete;
+            Semaphore& operator=(Semaphore&& other) = delete;
+
+            // This is the max number of tokens guaranteed to be held by the semaphore at
+            // any given point in time. Tokens submitted that exceed this value may silently
+            // be discarded.
+            enum { MaxGuaranteedCount = Baselib_Semaphore_MaxGuaranteedCount };
+
+            // Creates a counting semaphore synchronization primitive.
+            // If there are not enough system resources to create a semaphore, process abort is triggered.
+            Semaphore() : m_SemaphoreData(Baselib_Semaphore_Create())
+            {
+            }
+
+            // Reclaim resources and memory held by the semaphore.
+            //
+            // If threads are waiting on the semaphore, destructor will trigger an assert and may cause process abort.
+            ~Semaphore()
+            {
+                Baselib_Semaphore_Free(&m_SemaphoreData);
+            }
+
+            // Wait for semaphore token to become available
+            //
+            // This function is guaranteed to emit an acquire barrier.
+            inline void Acquire()
+            {
+                return Baselib_Semaphore_Acquire(&m_SemaphoreData);
+            }
+
+            // Try to consume a token and return immediately.
+            //
+            // When successful this function is guaranteed to emit an acquire barrier.
+            //
+            // Return:          true if token was consumed. false if not.
+            inline bool TryAcquire()
+            {
+                return Baselib_Semaphore_TryAcquire(&m_SemaphoreData);
+            }
+
+            // Wait for semaphore token to become available
+            //
+            // When successful this function is guaranteed to emit an acquire barrier.
+            //
+            // TryAcquire with a zero timeout differs from TryAcquire() in that TryAcquire() is guaranteed to be a user space operation
+            // while Acquire with a zero timeout may enter the kernel and cause a context switch.
+            //
+            // Timeout passed to this function may be subject to system clock resolution.
+            // If the system clock has a resolution of e.g. 16ms that means this function may exit with a timeout error 16ms earlier than originally scheduled.
+            //
+            // Arguments:
+            // - timeout:       Time to wait for token to become available.
+            //
+            // Return:          true if token was consumed. false if timeout was reached.
+            inline bool TryTimedAcquire(const timeout_ms timeoutInMilliseconds)
+            {
+                return Baselib_Semaphore_TryTimedAcquire(&m_SemaphoreData, timeoutInMilliseconds.count());
+            }
+
+            // Submit tokens to the semaphore.
+            //
+            // When successful this function is guaranteed to emit a release barrier.
+            //
+            // Increase the number of available tokens on the semaphore by `count`. Any waiting threads will be notified there are new tokens available.
+            // If count reach `Baselib_Semaphore_MaxGuaranteedCount` this function may silently discard any overflow.
+            inline void Release(uint16_t count)
+            {
+                return Baselib_Semaphore_Release(&m_SemaphoreData, count);
+            }
+
+            // Sets the semaphore token count to zero and release all waiting threads.
+            //
+            // When successful this function is guaranteed to emit a release barrier.
+            //
+            // Return:          number of released threads.
+            inline uint32_t ResetAndReleaseWaitingThreads()
+            {
+                return Baselib_Semaphore_ResetAndReleaseWaitingThreads(&m_SemaphoreData);
+            }
+
+        private:
+            Baselib_Semaphore   m_SemaphoreData;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Stopwatch.h
+++ b/Libraries/external/baselib/Include/Cpp/Stopwatch.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "Time.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // Stopwatch
+        // Simplistic stopwatch tool to take accurate time measurements using Baselib_Timer
+        //
+        // Usage example:
+        // auto watch = Stopwatch::StartNew();
+        // HeavyOperation();
+        // printf("Time passed: %fs", watch.GetElapsedTime().ToSeconds());
+        class Stopwatch
+        {
+        public:
+            static Stopwatch StartNew() { return Stopwatch(); }
+
+            high_precision_clock::duration GetElapsedTime() const
+            {
+                return high_precision_clock::duration_from_ticks(high_precision_clock::now_in_ticks() - m_StartTime);
+            }
+
+            high_precision_clock::duration Restart()
+            {
+                high_precision_clock::duration elapsed = GetElapsedTime();
+                m_StartTime = high_precision_clock::now_in_ticks();
+                return elapsed;
+            }
+
+        private:
+            Stopwatch() : m_StartTime(high_precision_clock::now_in_ticks()) {}
+
+            Baselib_Timer_Ticks m_StartTime;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Thread.h
+++ b/Libraries/external/baselib/Include/Cpp/Thread.h
@@ -0,0 +1,135 @@
+#pragma once
+
+#include "../C/Baselib_Thread.h"
+#include "Time.h"
+
+#include <memory>
+#if !COMPILER_SUPPORTS_GENERIC_LAMBDA_EXPRESSIONS
+#include <functional>
+#endif
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        /*
+        This class is not supposed to be used as-is.
+        Instead separate thread class should be created to explicitely define thread lifetime.
+        This is useful to avoid having timeout constants all over the codebase.
+
+        class ApplicationThread : public baselib::Thread
+        {
+        public:
+            // Expose base class constructors.
+            using baselib::Thread::Thread;
+
+            void Join()
+            {
+                // Thread must join with-in 10 seconds, or this is an error.
+                // Use application specific methods to report error and/or try again.
+                assert(baselib::Thread::TryJoin(10 * 1000) == true);
+            }
+        };
+
+        */
+        class BASELIB_API Thread
+        {
+        public:
+            // Default constructor does nothing, useful when declaring thread as field in classes/structs
+            Thread() = default;
+
+            // Generic Constructor
+            template<class FunctionType , class ... Args>
+            Thread(FunctionType && f, Args && ... args)
+            {
+            #if COMPILER_SUPPORTS_GENERIC_LAMBDA_EXPRESSIONS
+                // This generates cleaner and nicer-to-debug code
+                auto wrapped = [ = ] {f(args ...);};
+            #else
+                auto wrapped = std::bind(f, args ...);
+            #endif
+                using Container = decltype(wrapped);
+
+                // Small object optimization.
+                constexpr bool smallObject = (sizeof(Container) <= sizeof(void*)) && (alignof(Container) <= alignof(void*));
+                if (smallObject)
+                {
+                    union
+                    {
+                        // sizeof(void*) will trigger placement new errors
+                        // even if code path is not executed
+                        char buf[sizeof(Container)];
+                        void* smallObject;
+                    };
+                    smallObject = nullptr; // to avoid -Wmaybe-uninitialized
+                    // We have to move it to pointer, otherwise wrapped destructor will be called
+                    new(buf) Container(std::move(wrapped));
+
+                    thread = CreateThread(ThreadProxySmallObject<Container>, smallObject);
+                }
+                else
+                {
+                    std::unique_ptr<Container> ptr(new Container(std::move(wrapped)));
+                    thread = CreateThread(ThreadProxyHeap<Container>, ptr.get());
+                    if (thread)
+                        ptr.release();
+                }
+            }
+
+            // Thread has to be joined before destructor is called
+            ~Thread();
+
+            // Non-copyable
+            Thread(const Thread&) = delete;
+            Thread& operator=(const Thread&) = delete;
+
+            // Movable
+            Thread(Thread&& other);
+            Thread& operator=(Thread&& other);
+
+            // Return true if threads are supported
+            static bool SupportsThreads();
+
+            // Return true if join succeeded
+            COMPILER_WARN_UNUSED_RESULT bool TryJoin(timeout_ms timeout);
+
+            // Yields execution
+            static inline void YieldExecution()
+            {
+                Baselib_Thread_YieldExecution();
+            }
+
+            // Returns thread id
+            inline Baselib_Thread_Id GetId()
+            {
+                return Baselib_Thread_GetId(thread);
+            }
+
+            // Returns current thread id
+            static inline Baselib_Thread_Id GetCurrentId()
+            {
+                return Baselib_Thread_GetCurrentThreadId();
+            }
+
+        private:
+            Baselib_Thread* thread = nullptr;
+
+            static Baselib_Thread* CreateThread(Baselib_Thread_EntryPointFunction function, void* arg);
+
+            template<class T>
+            static void ThreadProxyHeap(void* data)
+            {
+                std::unique_ptr<T> ptr(reinterpret_cast<T*>(data));
+                (*ptr)();
+            }
+
+            template<class T>
+            static void ThreadProxySmallObject(void* data)
+            {
+                T* ptr = reinterpret_cast<T*>(&data);
+                (*ptr)();
+                ptr->~T();
+            }
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/ThreadLocalStorage.h
+++ b/Libraries/external/baselib/Include/Cpp/ThreadLocalStorage.h
@@ -0,0 +1,103 @@
+#pragma once
+
+#include "../C/Baselib_ThreadLocalStorage.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // Thread Local Storage provides a variable that can be global but have different value in every thread.
+        // For more details see Baselib_ThreadLocalStorage.
+        // On some platforms this might be fiber local storage.
+        //
+        // Example of usage:
+        // static ThreadLocalStorage<int32_t> threadErrorState;
+        template<typename T>
+        class ThreadLocalStorage
+        {
+        public:
+            // by nature of TLS slots, they must be non-copyable, so
+            ThreadLocalStorage(const ThreadLocalStorage & other) = delete;
+            ThreadLocalStorage& operator=(const ThreadLocalStorage & other) = delete;
+
+            ThreadLocalStorage()
+            {
+                static_assert(sizeof(T) <= sizeof(uintptr_t), "Provided type is too large to be stored in ThreadLocalStorage");
+                handle = Baselib_TLS_Alloc();
+            }
+
+            ~ThreadLocalStorage()
+            {
+                if (IsValid())
+                {
+                    Baselib_TLS_Free(handle);
+                    handle = InvalidTLSHandle;
+                }
+            }
+
+            ThreadLocalStorage(ThreadLocalStorage && other)
+            {
+                // ensure that we don't leak local handle
+                if (handle != InvalidTLSHandle)
+                    Baselib_TLS_Free(handle);
+                handle = other.handle;
+                other.handle = InvalidTLSHandle;
+            }
+
+            // Check if variable is valid.
+            // The only case when variable might be invalid is if it was moved to some other instance.
+            inline bool IsValid() const
+            {
+                return handle != InvalidTLSHandle;
+            }
+
+            // Resets value in all threads.
+            void Reset()
+            {
+                Baselib_TLS_Free(handle);
+                handle = Baselib_TLS_Alloc();
+            }
+
+            inline T operator=(T value)
+            {
+                Baselib_TLS_Set(handle, (uintptr_t)value);
+                return value;
+            }
+
+            inline ThreadLocalStorage<T>& operator=(ThreadLocalStorage&& other)
+            {
+                // swap values
+                Baselib_TLS_Handle t = handle;
+                handle = other.handle;
+                other.handle = t;
+                return *this;
+            }
+
+            inline operator T() const
+            {
+                return (T)Baselib_TLS_Get(handle);
+            }
+
+            inline T operator->() const
+            {
+                return (T)Baselib_TLS_Get(handle);
+            }
+
+            inline T operator++()
+            {
+                *this = *this + 1;
+                return *this;
+            }
+
+            inline T operator--()
+            {
+                *this = *this - 1;
+                return *this;
+            }
+
+        private:
+            Baselib_TLS_Handle handle = InvalidTLSHandle;
+            static constexpr uintptr_t InvalidTLSHandle = UINTPTR_MAX;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/Time.h
+++ b/Libraries/external/baselib/Include/Cpp/Time.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include "../C/Baselib_Timer.h"
+#include <chrono>
+#include <cmath>
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        using timeout_ms = std::chrono::duration<uint32_t, std::milli>;
+        using timeout_us = std::chrono::duration<uint64_t, std::micro>;
+
+        struct high_precision_clock
+        {
+            using duration = std::chrono::duration<double, std::nano>;
+            using time_point = std::chrono::time_point<high_precision_clock, duration>;
+            using rep = duration::rep;
+            using period = duration::period;
+
+            static constexpr bool is_steady = true;
+
+            static time_point now()
+            {
+                return time_point_from_ticks(now_in_ticks());
+            }
+
+            static Baselib_Timer_Ticks now_in_ticks()
+            {
+                return Baselib_Timer_GetHighPrecisionTimerTicks();
+            }
+
+            static duration duration_from_ticks(Baselib_Timer_Ticks ticks)
+            {
+                return duration(ticks * Baselib_Timer_TickToNanosecondsConversionFactor);
+            }
+
+            static Baselib_Timer_Ticks ticks_from_duration_roundup(duration d)
+            {
+                double ticks = d.count() / Baselib_Timer_TickToNanosecondsConversionFactor;
+                return (Baselib_Timer_Ticks)std::ceil(ticks);
+            }
+
+            static time_point time_point_from_ticks(Baselib_Timer_Ticks ticks)
+            {
+                return time_point(duration_from_ticks(ticks));
+            }
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/affix_allocator.h
+++ b/Libraries/external/baselib/Include/Cpp/affix_allocator.h
@@ -0,0 +1,123 @@
+#pragma once
+
+#include <type_traits>
+#include "Algorithm.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // Baselib affix allocator implementation providing optional prefix and suffix memory regions in addition to requested size.
+        //
+        // The affix allocator purpose is to provide memory regions directly adjacent to allocated memory of requested size and alignment.
+        // It is not intended to be a turn-key, general purpose solution, but rather act as a template building block for derived allocators which may extend,
+        // add or ignore methods for specific needs.
+        //
+        // Allocation methods allocate, reallocate and deallocate are using the `Allocator` implementation for memory allocation, as are alignment properties.
+        // As a rule of thumb, Allocator method calls may fail depending on their specific implementation.
+        // What (if any) action is to be taken in such cases is intentionally left to be implemented by the derived class.
+        //
+        // No operations, synchronisation  or alignment concept are applied to the prefix or suffix memory.
+        // Prefix memory address is obtained using the `prefix` function and  is always allocated memory pointer minus prefix_size (ptr - prefix_size).
+        // Suffix memory address is obtained using the `suffix` function and  is always directly adjacent to the end of allocated memory (ptr + size).
+        //
+        // Notes on memory footprint:
+        // Internally allocated memory must be large enough to hold requested allocation size, prefix_size, suffix_size and alignment padding.
+        // The internally allocated size is calculated as follows: size + suffix_size + (prefix_size rounded up to alignment).
+        // If alignment padding is significant, it may be preferable to use a suffix over a prefix to reduce memory footprint.
+        //
+        template<class Allocator, size_t prefix_size, size_t suffix_size>
+        class affix_allocator : protected Allocator
+        {
+        public:
+            // Allocated memory is guaranteed to always be aligned to at least the value of `alignment`.
+            static constexpr uint32_t alignment = Allocator::alignment;
+
+            // Allocates a memory block large enough to hold `size` number of bytes. Zero size is valid.
+            //
+            // \returns Address to memory block of allocated memory.
+            void* allocate(size_t size)
+            {
+                return OffsetPtrChecked(Allocator::allocate(size + m_AffixSize), m_PrefixAlignedSize);
+            }
+
+            // Reallocates previously allocated or reallocated memory block pointer reference `ptr` from `old_size` to `new_size` number of bytes.
+            // Passing `nullptr` in `ptr` yield the same result as calling `allocate`.
+            // If `suffix_size` is non-zero, the suffix memory is moved to the new location.
+            //
+            // \returns Address to memory block of reallocated memory.
+            void* reallocate(void* ptr, size_t old_size, size_t new_size)
+            {
+                return ptr == nullptr ? allocate(new_size) : ReallocateImpl(ptr, old_size, new_size);
+            }
+
+            // Deallocates memory block previously allocated or reallocated with `size` pointed to by `ptr`.
+            // Passing `nullptr` in `ptr` result in a no-op.
+            //
+            // \returns Always returns `true` (see notes on operation failure).
+            bool deallocate(void* ptr, size_t size)
+            {
+                return Allocator::deallocate(OffsetPtr(ptr, -m_PrefixAlignedSize), size + m_AffixSize);
+            }
+
+            // Calculate optimal allocation of size of `Allocator` allocator given `size`.
+            //
+            // \returns Optimal size of allocations when allocating memory given `size`.
+            constexpr size_t optimal_size(size_t size) const
+            {
+                return Allocator::optimal_size(size);
+            }
+
+            // Get prefix memory block address of allocation pointed to by `ptr`.
+            // Memory must be a valid allocation from `allocate` or `reallocate`, or result is undefined.
+            //
+            // \returns Prefix memory address or nullptr if `prefix_size` is zero.
+            void* prefix(void* ptr) const
+            {
+                return prefix_size == 0 ? nullptr : OffsetPtr(ptr, -static_cast<ptrdiff_t>(prefix_size));
+            }
+
+            // Get suffix memory block address of allocation with `size` pointed to by `ptr`.
+            // Memory must be a valid allocation from `allocate` or `reallocate`, or result is undefined.
+            //
+            // \returns Suffix memory address or nullptr if `suffix_size` is zero.
+            void* suffix(void* ptr, size_t size) const
+            {
+                return suffix_size == 0 ? nullptr : OffsetPtr(ptr, size);
+            }
+
+        private:
+            static constexpr size_t AlignSize(size_t size) { return (size + Allocator::alignment - 1) & ~(Allocator::alignment - 1); }
+
+            static FORCE_INLINE constexpr void *OffsetPtrChecked(const void *ptr, size_t offset) { return ptr == nullptr ? nullptr : OffsetPtr(ptr, offset); }
+            static FORCE_INLINE constexpr void *OffsetPtr(const void *ptr, size_t offset)
+            {
+                return reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(ptr) + offset);
+            }
+
+            template<size_t value = suffix_size, typename std::enable_if<value == 0, bool>::type = 0>
+            FORCE_INLINE void* ReallocateImpl(void* ptr, size_t old_size, size_t new_size)
+            {
+                return OffsetPtrChecked(Allocator::reallocate(OffsetPtr(ptr, -m_PrefixAlignedSize), old_size + m_PrefixAlignedSize, new_size + m_PrefixAlignedSize), m_PrefixAlignedSize);
+            }
+
+            template<size_t value = suffix_size, typename std::enable_if<value != 0, bool>::type = 0>
+            FORCE_INLINE void* ReallocateImpl(void* ptr, size_t old_size, size_t new_size)
+            {
+                uint8_t tmpSuffix[m_SuffixSize];
+                memcpy(tmpSuffix, suffix(ptr, old_size), m_SuffixSize);
+                ptr = Allocator::reallocate(OffsetPtr(ptr, -m_PrefixAlignedSize), old_size + m_AffixSize, new_size + m_AffixSize);
+                if (ptr)
+                {
+                    ptr = OffsetPtr(ptr, m_PrefixAlignedSize);
+                    memcpy(suffix(ptr, new_size), tmpSuffix, m_SuffixSize);
+                }
+                return ptr;
+            }
+
+            static constexpr ptrdiff_t m_PrefixAlignedSize = AlignSize(prefix_size);
+            static constexpr ptrdiff_t m_SuffixSize = suffix_size;
+            static constexpr ptrdiff_t m_AffixSize = m_PrefixAlignedSize + m_SuffixSize;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/fallback_allocator.h
+++ b/Libraries/external/baselib/Include/Cpp/fallback_allocator.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include <type_traits>
+#include <algorithm>
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // Baselib fallback allocator implementation with baselib allocators method coverage.
+        // If the `Primary` allocator fail to allocate the request it's passed to the `Fallback` Allocator.
+        //
+        // The fallback allocator purpose is to provide a template for implementation using an allocator composition approach.
+        // While providing for the baselib allocators interface(s), it's not intended to be a turn-key, general purpose solution, but rather
+        // act as a template building block for derived allocators which may extend, add or ignore methods for specific needs.
+        //
+        // As a rule of thumb, Both Primary and Secondary allocator method calls may fail depending on their specific implementation.
+        // What (if any) action is to be taken in such cases is intentionally left to be implemented by the derived class.
+        //
+        template<class Primary, class Fallback>
+        class fallback_allocator : protected Primary, protected Fallback
+        {
+        public:
+            // Allocations are guaranteed to always be aligned to at least the value of `alignment`
+            // Alignment is the minimal value of Primary and Fallback allocator alignment, which is what can be guaranteed.
+            static constexpr unsigned alignment = (Primary::alignment < Fallback::alignment) ? Primary::alignment : Fallback::alignment;
+
+            // Allocates a memory block large enough to hold `size` number of bytes.
+            //
+            // \returns Address to memory block of allocated memory or nullptr if allocation failed.
+            void* allocate(size_t size)
+            {
+                void *ptr = Primary::allocate(size);
+                if (ptr == nullptr)
+                    ptr = Fallback::allocate(size);
+                return ptr;
+            }
+
+            // Reallocates previously allocated or reallocated memory block pointer reference `ptr` from `old_size` to `new_size` number of bytes.
+            // Reallocation will fail if the ownership of the new allocation can't be preserved.
+            //
+            // \returns Address to memory block of reallocated memory or nullptr if reallocation failed.
+            void* reallocate(void* ptr, size_t old_size, size_t new_size)
+            {
+                if (Primary::owns(ptr, old_size))
+                    return Primary::reallocate(ptr, old_size, new_size);
+                return Fallback::reallocate(ptr, old_size, new_size);
+            }
+
+            // Deallocates memory block previously allocated or reallocated with `size` pointed to by `ptr`.
+            //
+            // \returns True if the operation was successful.
+            bool deallocate(void* ptr, size_t size)
+            {
+                if (Primary::owns(ptr, size))
+                    return Primary::deallocate(ptr, size);
+                return Fallback::deallocate(ptr, size);
+            }
+
+            // Calculate optimal allocation size of the primary allocator given `size`.
+            //
+            // \returns Optimal size of the primary allocator when allocating memory given `size`.
+            constexpr size_t optimal_size(size_t size) const
+            {
+                return Primary::optimal_size(size);
+            }
+
+            // Checks for the ownership allocation given `ptr` and `size`
+            // It is implementation defined if either or both of `ptr` and `size` are considered to determine ownership.
+            //
+            // \returns True if the primary allocator owns the allocation.
+            bool owns(const void* ptr, size_t size) const
+            {
+                return Primary::owns(ptr, size);
+            }
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/heap_allocator.h
+++ b/Libraries/external/baselib/Include/Cpp/heap_allocator.h
@@ -0,0 +1,99 @@
+#pragma once
+
+#include "Internal/heap_allocator.inl.h"
+#include "Algorithm.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // Heap allocator implementation providing platform dependent system heap allocation.
+        //
+        // Allocations are guaranteed to be aligned to at least the value of `default_alignment`.
+        // For optimal performance, platform aligned allocation calls are only used when `default_alignment` exceeds platform minimum alignment guarantee.
+        // This allocator is a stateless allocator (empty class).
+        //
+        // Notes on operation failure of allocator methods:
+        //  Operation failures will currently trigger process abort by the underlying system.
+        //  As a result the heap allocator currently will never return `nullptr`/`false` to signal failure, as is standard behaviour (nor any error state information).
+        //
+        template<uint32_t default_alignment = 8>
+        class heap_allocator
+        {
+            using impl = detail::heap_allocator<default_alignment>;
+            static_assert((default_alignment <= impl::max_alignment), "'default_alignment' exceeded max value");
+            static_assert((default_alignment != 0), "'default_alignment' must not be a zero value");
+            static_assert(::baselib::Algorithm::IsPowerOfTwo(default_alignment), "'default_alignment' must be a power of two value");
+
+        public:
+            // Allocated memory is guaranteed to always be aligned to at least the value of `alignment`.
+            static constexpr uint32_t alignment = default_alignment;
+
+            // Typedefs
+            typedef Baselib_ErrorState error_state;
+
+            // Allocates a memory block large enough to hold `size` number of bytes. Zero size is valid.
+            //
+            // \returns Address to memory block of allocated memory.
+            void* allocate(size_t size) const
+            {
+                error_state result = Baselib_ErrorState_Create();
+                return impl::allocate(size, &result);
+            }
+
+            // Allocates a memory block large enough to hold `size` number of bytes. Zero size is valid.
+            //
+            // \returns Address to memory block of allocated memory.
+            void* allocate(size_t size, error_state *error_state_ptr) const
+            {
+                return impl::allocate(size, error_state_ptr);
+            }
+
+            // Reallocates previously allocated or reallocated memory block pointer reference `ptr` from `old_size` to `new_size` number of bytes.
+            // Passing `nullptr` in `ptr` yield the same result as calling `allocate`.
+            //
+            // \returns Address to memory block of reallocated memory.
+            void* reallocate(void* ptr, size_t old_size, size_t new_size) const
+            {
+                error_state result = Baselib_ErrorState_Create();
+                return impl::reallocate(ptr, old_size, new_size, &result);
+            }
+
+            // Reallocates previously allocated or reallocated memory block pointer reference `ptr` from `old_size` to `new_size` number of bytes.
+            // Passing `nullptr` in `ptr` yield the same result as calling `allocate`.
+            //
+            // \returns Address to memory block of reallocated memory.
+            void* reallocate(void* ptr, size_t old_size, size_t new_size, error_state *error_state_ptr) const
+            {
+                return impl::reallocate(ptr, old_size, new_size, error_state_ptr);
+            }
+
+            // Deallocates memory block previously allocated or reallocated with `size` pointed to by `ptr`.
+            // Passing `nullptr` in `ptr` result in a no-op.
+            //
+            // \returns Always returns `true` (see notes on operation failure).
+            bool deallocate(void* ptr, size_t size) const
+            {
+                error_state result = Baselib_ErrorState_Create();
+                return impl::deallocate(ptr, size, &result);
+            }
+
+            // Deallocates memory block previously allocated or reallocated with `size` pointed to by `ptr`.
+            // Passing `nullptr` in `ptr` result in a no-op.
+            //
+            // \returns Always returns `true` (see notes on operation failure).
+            bool deallocate(void* ptr, size_t size, error_state *error_state_ptr) const
+            {
+                return impl::deallocate(ptr, size, error_state_ptr);
+            }
+
+            // Calculate optimal allocation size given `size`.
+            //
+            // \returns Optimal size when allocating memory given `size`.
+            constexpr size_t optimal_size(size_t size) const
+            {
+                return impl::optimal_size(size);
+            }
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/mpmc_fixed_queue.h
+++ b/Libraries/external/baselib/Include/Cpp/mpmc_fixed_queue.h
@@ -0,0 +1,238 @@
+#pragma once
+
+#include "Atomic.h"
+#include "heap_allocator.h"
+#include "../C/Baselib_Memory.h"
+
+#include <algorithm>
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, a queue is a collection in which the entities in the collection are kept in order and the principal (or only) operations on the
+        // collection are the addition of entities to the rear terminal position, known as enqueue, and removal of entities from the front terminal position, known
+        // as dequeue. This makes the queue a First-In-First-Out (FIFO) data structure. In a FIFO data structure, the first element added to the queue will be the
+        // first one to be removed. This is equivalent to the requirement that once a new element is added, all elements that were added before have to be removed
+        // before the new element can be removed. Often a peek or front operation is also entered, returning the value of the front element without dequeuing it.
+        // A queue is an example of a linear data structure, or more abstractly a sequential collection.
+        //
+        // "Queue (abstract data type)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/w/index.php?title=Queue_(abstract_data_type)&oldid=878671332
+        //
+
+        // This implementation is a fixed size queue capable of handling multiple concurrent producers and consumers
+        //
+        // Implementation of the queue is lockfree in the sense that one thread always progress. Either by inserting an element or failing to insert an element.
+        // Not though, that the data structure in it self is not lock free. In theory if a thread writing an element gets pre-emptied that thread may block reads
+        // from proceeding past that point until the writer thread wake up and complete it's operation.
+        template<typename value_type, bool cacheline_aligned = true>
+        class mpmc_fixed_queue
+        {
+        public:
+            // Create a new queue instance capable of holding at most `capacity` number of elements.
+            // `buffer` is an optional user defined memory block large enough to hold the queue data structure.
+            // The size required is obtained by `buffer_size`, alignment requirements by `buffer_alignment`.
+            // If `buffer` is not set (default), the queue will internally allocate memory using baselib heap_allocator.
+            mpmc_fixed_queue(uint32_t capacity, void *buffer = nullptr)
+                : m_SlotAllocator()
+                , m_Slot(static_cast<Slot*>(buffer ? buffer : m_SlotAllocator.allocate(buffer_size(capacity))))
+                , m_UserAllocatedSlots(buffer ? nullptr : m_Slot)
+                , m_NumberOfSlots(capacity ? capacity : 2)
+                , m_Capacity(capacity)
+                , m_ReadPos(0)
+                , m_WritePos(0)
+            {
+                // a zero sized queue uses two slots - the first indicating the queue is empty, the other indicating it is full.
+                if (capacity == 0)
+                {
+                    m_Slot[0].checksum.store(WriteableChecksum(0), baselib::memory_order_relaxed);
+                    m_Slot[1].checksum.store(ReadableChecksumPrevGen(1), baselib::memory_order_relaxed);
+                    m_WritePos = 1; // Point at the second slot which indicates a full queue
+                }
+                else
+                {
+                    // fill queue with 'writable slots'
+                    for (uint32_t pos = 0; pos < capacity; ++pos)
+                        m_Slot[pos].checksum.store(WriteableChecksum(pos), baselib::memory_order_relaxed);
+                }
+
+                baselib::atomic_thread_fence(baselib::memory_order_seq_cst);
+            }
+
+            // Destroy queue, guaranteed to also destroy any elements held by the queue.
+            //
+            // If there are other threads currently accessing the queue behavior is undefined.
+            ~mpmc_fixed_queue()
+            {
+                for (;;)
+                {
+                    const uint32_t pos = m_ReadPos.fetch_add(1, baselib::memory_order_relaxed);
+                    Slot& slot = m_Slot[SlotIndex(pos)];
+                    if (slot.checksum.load(baselib::memory_order_acquire) != ReadableChecksum(pos))
+                        break;
+                    slot.value.~value_type();
+                }
+                m_SlotAllocator.deallocate(m_UserAllocatedSlots, buffer_size(static_cast<uint32_t>(m_Capacity)));
+                baselib::atomic_thread_fence(baselib::memory_order_seq_cst);
+            }
+
+            // Try to pop front most element off the queue
+            //
+            // Note that if several push operations are executed in parallel, the one returning first might not have pushed a new head.
+            // Which means that for the user it seems there is a new element in the queue, whereas for the queue the still non-present head will block the removal of any entries.
+            //
+            // \returns true if element was popped, false if queue was empty
+            COMPILER_WARN_UNUSED_RESULT
+            bool try_pop_front(value_type& value)
+            {
+                while (true)
+                {
+                    // Load current position and checksum.
+                    uint32_t pos = m_ReadPos.load(baselib::memory_order_relaxed);
+                    Slot* slot = &m_Slot[SlotIndex(pos)];
+                    uint32_t checksum = slot->checksum.load(baselib::memory_order_acquire);
+
+                    // As long as it looks like we can read from this slot.
+                    while (checksum == ReadableChecksum(pos))
+                    {
+                        // Try to acquire it and read slot on success.
+                        if (m_ReadPos.compare_exchange_weak(pos, pos + 1, baselib::memory_order_relaxed, baselib::memory_order_relaxed))
+                        {
+                            value = std::move(slot->value);
+                            slot->value.~value_type();
+                            slot->checksum.store(WriteableChecksumNextGen(pos), baselib::memory_order_release);
+                            return true;
+                        }
+                        // Reload checksum and try again (compare_exchange already reloaded the position)
+                        else
+                        {
+                            slot = &m_Slot[SlotIndex(pos)];
+                            checksum = slot->checksum.load(baselib::memory_order_acquire);
+                        }
+                    }
+
+                    // Is queue empty?
+                    if (checksum == WriteableChecksum(pos))
+                        return false;
+                }
+            }
+
+            // Try to append a new element to the end of the queue.
+            //
+            // Note that if several pop operations are executed in parallel, the one returning first might not have popped the head.
+            // Which means that for the user it seems there is a new free slot in the queue, whereas for the queue the still present head will block the addition of new entries.
+            //
+            // \returns true if element was appended, false if queue was full.
+            template<class ... Args>
+            COMPILER_WARN_UNUSED_RESULT
+            bool try_emplace_back(Args&& ... args)
+            {
+                while (true)
+                {
+                    // Load current position and checksum.
+                    uint32_t pos = m_WritePos.load(baselib::memory_order_relaxed);
+                    Slot* slot = &m_Slot[SlotIndex(pos)];
+                    uint32_t checksum = slot->checksum.load(baselib::memory_order_acquire);
+
+                    // As long as it looks like we can write to this slot.
+                    while (checksum == WriteableChecksum(pos))
+                    {
+                        // Try to acquire it and write slot on success.
+                        if (m_WritePos.compare_exchange_weak(pos, pos + 1, baselib::memory_order_relaxed, baselib::memory_order_relaxed))
+                        {
+                            new(&slot->value) value_type(std::forward<Args>(args)...);
+                            slot->checksum.store(ReadableChecksum(pos), baselib::memory_order_release);
+                            return true;
+                        }
+                        // Reload checksum and try again (compare_exchange already reloaded the position)
+                        else
+                        {
+                            slot = &m_Slot[SlotIndex(pos)];
+                            checksum = slot->checksum.load(baselib::memory_order_acquire);
+                        }
+                    }
+
+                    // Is queue full?
+                    if (checksum == ReadableChecksumPrevGen(pos))
+                        return false;
+                }
+            }
+
+            // Try to push an element to the end of the queue.
+            //
+            // Note that if several pop operations are executed in parallel, the one returning first might not have popped the head.
+            // Which means that for the user it seems there is a new free slot in the queue, whereas for the queue the still present head will block the addition of new entries.
+            //
+            // \returns  true if element was pushed, false if queue was full.
+            COMPILER_WARN_UNUSED_RESULT
+            bool try_push_back(const value_type& value)
+            {
+                return try_emplace_back(value);
+            }
+
+            // Try to push an element to the end of the queue.
+            //
+            // Note that if several pop operations are executed in parallel, the one returning first might not have popped the head.
+            // Which means that for the user it seems there is a new free slot in the queue, whereas for the queue the still present head will block the addition of new entries.
+            //
+            // \returns true if element was pushed, false if queue was full.
+            COMPILER_WARN_UNUSED_RESULT
+            bool try_push_back(value_type&& value)
+            {
+                return try_emplace_back(std::forward<value_type>(value));
+            }
+
+            // \returns the number of elements that can fit in the queue.
+            size_t capacity() const
+            {
+                return m_Capacity;
+            }
+
+            // Calculate the size in bytes of an memory buffer required to hold `capacity` number of elements.
+            //
+            // \returns Buffer size in bytes.
+            static constexpr size_t buffer_size(uint32_t capacity)
+            {
+                return sizeof(Slot) * (capacity ? capacity : 2);
+            }
+
+            // Calculate the required alignment for a memory buffer containing `value_type` elements.
+            //
+            // \returns Alignment requirement
+            static constexpr size_t buffer_alignment()
+            {
+                return SlotAlignment;
+            }
+
+        private:
+            static constexpr uint32_t MinTypeAlignment = alignof(value_type) > sizeof(void*) ? alignof(value_type) : sizeof(void*);
+            static constexpr uint32_t SlotAlignment = cacheline_aligned && PLATFORM_CACHE_LINE_SIZE > MinTypeAlignment ? PLATFORM_CACHE_LINE_SIZE : MinTypeAlignment;
+            static constexpr uint32_t ReadableBit = (uint32_t)1 << 31;
+            static constexpr uint32_t WritableMask = ~ReadableBit;
+            static constexpr uint32_t WriteableChecksum(uint32_t pos)       { return pos & WritableMask; }
+            static constexpr uint32_t ReadableChecksum(uint32_t pos)        { return pos | ReadableBit; }
+            constexpr uint32_t WriteableChecksumNextGen(uint32_t pos) const { return (pos + m_NumberOfSlots) & WritableMask; }
+            constexpr uint32_t ReadableChecksumPrevGen(uint32_t pos) const  { return (pos - m_NumberOfSlots) | ReadableBit; }
+
+            constexpr uint32_t SlotIndex(uint32_t pos) const           { return pos % m_NumberOfSlots; }
+
+            const baselib::heap_allocator<SlotAlignment> m_SlotAllocator;
+
+            struct alignas(SlotAlignment) Slot
+            {
+                value_type value;
+                baselib::atomic<uint32_t> checksum;
+            };
+            Slot *const m_Slot;
+            void *const m_UserAllocatedSlots;
+
+            // benchmarks show using uint32_t gives ~3x perf boost on 64bit platforms compared to size_t (uint64_t)
+            const uint32_t m_NumberOfSlots;
+            const size_t   m_Capacity;
+
+            alignas(PLATFORM_CACHE_LINE_SIZE) baselib::atomic<uint32_t> m_ReadPos;
+            alignas(PLATFORM_CACHE_LINE_SIZE) baselib::atomic<uint32_t> m_WritePos;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/mpmc_node.h
+++ b/Libraries/external/baselib/Include/Cpp/mpmc_node.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "Atomic.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // mpmc_node container node class. All nodes used by mpmc_node containers must derive from this class.
+        // No initialization or other restrictions apply. Inherited class is not accessed by the mpmc_node containers.
+        class mpmc_node
+        {
+        public:
+            baselib::atomic<mpmc_node*> next;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/mpmc_node_queue.h
+++ b/Libraries/external/baselib/Include/Cpp/mpmc_node_queue.h
@@ -0,0 +1,239 @@
+#pragma once
+
+#include "../C/Baselib_Memory.h"
+#include "../C/Baselib_Atomic_LLSC.h"
+#include "mpmc_node.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, a queue is a collection in which the entities in the collection are kept in order and the principal (or only) operations on the
+        // collection are the addition of entities to the rear terminal position, known as enqueue, and removal of entities from the front terminal position, known
+        // as dequeue. This makes the queue a First-In-First-Out (FIFO) data structure. In a FIFO data structure, the first element added to the queue will be the
+        // first one to be removed. This is equivalent to the requirement that once a new element is added, all elements that were added before have to be removed
+        // before the new element can be removed. Often a peek or front operation is also entered, returning the value of the front element without dequeuing it.
+        // A queue is an example of a linear data structure, or more abstractly a sequential collection.
+        //
+        // "Queue (abstract data type)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/w/index.php?title=Queue_(abstract_data_type)&oldid=878671332
+        //
+
+        // This implementation is a lockless node queue capable of handling multiple concurrent producers and consumers
+        //
+        // Node types are required to inherit the mpmc_node class. No data from the inherited class is modified/copied, so no restrictions apply.
+        // The node memory is allocated and destroyed by the user (user owned).
+        // Dequeued nodes may be overwritten/discarded and/or reused.
+        // Dequeued nodes may not be deleted (released from user space memory) while any consumer thread is in the scope of a deque call.
+        //
+        // Notes consumer threads:
+        //  While dequeued nodes may be reused and/or overwritten they must however remain in application readable memory (user space memory) until it can be
+        //  guaranteed no consumer thread is still processing the node i.e. not within the scope of a dequeue call.
+        //  Even though the value is ignored (discarded by version check) any consumer thread may still read the node link information.
+        //  Consumer threads are concurrently attempting to dequeue the front in a DCAS loop and the first to succeed will update the queue front and other
+        //  threads continue processing the next front node in the queue. Threads are garuanteed to progress dequeuing nodes even if another consumer
+        //  thread falls asleep during a dequeue, but may fail to dequeue in the combination of the queue getting pre-emptied and the thread resetting the
+        //  state (reload back) falls asleep while swapping the back (between 2x consecutive CAS operations).
+        //  This is usually an extremely infrequent occurence due to the combination required (can not happen unless there's exactly one item in the queue).
+        //  Producer threads always progress independently.
+        //
+        // Notes on producer threads:
+        //  A producer thread swaps the back and writes the link information in two consecutive atomic operations. If a producer thread falls asleep after the
+        //  swap and before the link information has been written, the consumer thread(s) will not advance past this point since it doesn't have
+        //  the information yet. Therefore the consumer threads calls will yield null until that particular producer thread wakes back up.
+        //
+        template<typename T>
+        class alignas(sizeof(intptr_t) * 2)mpmc_node_queue
+        {
+        public:
+            // Create a new queue instance.
+            mpmc_node_queue()
+            {
+                m_FrontIntPtr = 1;
+                m_Front.obj.idx = 1;
+                m_Back.obj = 0;
+                atomic_thread_fence(memory_order_seq_cst);
+            }
+
+            // Returns true if queue is empty.
+            bool empty() const
+            {
+                return m_Back.load(memory_order_relaxed) == 0;
+            }
+
+            // Push a node to the back of the queue.
+            void push_back(T* node)
+            {
+                node->next.store(0, memory_order_relaxed);
+                if (T* prev = m_Back.exchange(node, memory_order_release))
+                {
+                    prev->next.store(node, memory_order_release);
+                }
+                else
+                {
+                    // store the new front (reload) and add one which will put idx back to an
+                    // even number, releasing the consumer threads (ptr is always null and idx odd at this point).
+                    if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                    {
+                        m_FrontPair.ptr.store(node, memory_order_release);
+                    }
+                    else
+                    {
+                        m_FrontPair.ptr.store(node, memory_order_relaxed);
+                        m_FrontPair.idx.fetch_add(1, memory_order_release);
+                    }
+                }
+            }
+
+            // Push a linked list of nodes to the back of the queue.
+            void push_back(T* first_node, T* last_node)
+            {
+                last_node->next.store(0, memory_order_relaxed);
+                if (T* prev = m_Back.exchange(last_node, memory_order_release))
+                {
+                    prev->next.store(first_node, memory_order_release);
+                }
+                else
+                {
+                    if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                    {
+                        m_FrontPair.ptr.store(first_node, memory_order_release);
+                    }
+                    else
+                    {
+                        m_FrontPair.ptr.store(first_node, memory_order_relaxed);
+                        m_FrontPair.idx.fetch_add(1, memory_order_release);
+                    }
+                }
+            }
+
+            // Try to pop frontmost node of the queue.
+            //
+            // Note that if null is returned, there may still be push operations in progress in a producer thread.
+            // Use the "empty" function to check if a queue is empty.
+            //
+            // \returns front node of the queue or null.
+            T* try_pop_front()
+            {
+                T* node, *next;
+                if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                {
+                    intptr_t value;
+                    Baselib_atomic_llsc_ptr_acquire_release_v(&m_Front, &node, &next,
+                    {
+                        // If front bit 0 is set, queue back is being reloaded or queue is empty.
+                        value = reinterpret_cast<intptr_t>(node);
+                        if (value & 1)
+                        {
+                            Baselib_atomic_llsc_break();
+                            return 0;
+                        }
+
+                        // Fetch next node. If zero, node is the current backnode. LLSC Monitor is internally cleared by subsequent cmpxchg.
+                        if (!(next = static_cast<T*>(node->next.obj)))
+                            goto BackNode;
+                    });
+                    return node;
+
+                BackNode:
+                    // - filters obsolete nodes
+                    // - Exclusive access (re-entrant block)
+                    T * front = node;
+                    if (!m_FrontPair.ptr.compare_exchange_strong(front, reinterpret_cast<T*>(value | 1), memory_order_acquire, memory_order_relaxed))
+                        return 0;
+
+                    // - filters incomplete nodes
+                    // - check if node is back == retrigger new back
+                    if (!m_Back.compare_exchange_strong(front, 0, memory_order_acquire, memory_order_relaxed))
+                    {
+                        // Back progressed or node is incomplete, restore access and return 0
+                        m_FrontIntPtr.fetch_and(~1, memory_order_release);
+                        return 0;
+                    }
+
+                    // Success, back == front node, back was set to zero above and index / access is restored by producers, so we return the back node.
+                    // LLSC monitors invalidates any obsolete nodes still in process in other threads.
+                    return node;
+                }
+                else
+                {
+                    SequencedFrontPtr front, value;
+
+                    // Get front node. The DCAS while operation will update front on retry
+                    front = m_Front.load(memory_order_acquire);
+                    do
+                    {
+                        // If front idx bit 0 is set, queue back is being reloaded or queue is empty.
+                        if (front.idx & 1)
+                            return 0;
+
+                        // Fetch next node. If zero, node is the current backnode
+                        node = front.ptr;
+                        if (!(next = static_cast<T*>(node->next.load(memory_order_relaxed))))
+                            goto BackNodeDCAS;
+
+                        // On success, replace the current with the next node and return node. On fail, retry with updated front.
+                        value.ptr = next;
+                        value.idx = front.idx + 2;
+                    }
+                    while (!m_Front.compare_exchange_strong(front, value, memory_order_acquire, memory_order_relaxed));
+                    return node;
+
+                BackNodeDCAS:
+                    // - filters obsolete nodes
+                    // - Exclusive access (re-entrant block)
+                    value.ptr = front.ptr;
+                    value.idx = front.idx | 1;
+                    if (!m_Front.compare_exchange_strong(front, value, memory_order_acquire, memory_order_relaxed))
+                        return 0;
+
+                    // - filters incomplete nodes
+                    // - check if node is back == retrigger new back
+                    value.ptr = node;
+                    if (!m_Back.compare_exchange_strong(value.ptr, 0, memory_order_acquire, memory_order_relaxed))
+                    {
+                        // Back progressed or node is incomplete, restore access and return 0
+                        m_FrontPair.idx.fetch_and(~1, memory_order_release);
+                        return 0;
+                    }
+
+                    // Success, back == front node, back was set to zero above and index / access is restored by producers, so we return the back node.
+                    // Version check invalidates any obsolete nodes in still in process in other threads.
+                    return node;
+                }
+            }
+
+        private:
+            typedef struct
+            {
+                T*       ptr;
+                intptr_t idx;
+            } SequencedFrontPtr;
+
+            typedef struct
+            {
+                atomic<T*>       ptr;
+                atomic<intptr_t> idx;
+            } FrontPair;
+
+            // Space out atomic members to individual cache lines. Required for native LLSC operations on some architectures, others to avoid false sharing
+            char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE];
+            union
+            {
+                atomic<intptr_t> m_FrontIntPtr;
+                FrontPair m_FrontPair;
+                atomic<SequencedFrontPtr> m_Front;
+            };
+            char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - sizeof(SequencedFrontPtr)];
+            atomic<T*> m_Back;
+            char _cachelineSpacer2[PLATFORM_CACHE_LINE_SIZE - sizeof(T*)];
+
+            // FrontPair is atomic reflections of the SequencedFront fields used for CAS vs DCAS ops. They must match in size and layout.
+            // Do note that we can not check layout (offsetof) as the template class is incomplete!
+            static_assert(sizeof(mpmc_node_queue::m_FrontPair) == sizeof(mpmc_node_queue::m_Front), "SequencedFrontPtr and FrontPair must be of equal size");
+
+            // Verify mpmc_node is base of T
+            static_assert(std::is_base_of<baselib::mpmc_node, T>::value, "Node class/struct used with baselib::mpmc_node_queue must derive from baselib::mpmc_node.");
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/mpmc_node_stack.h
+++ b/Libraries/external/baselib/Include/Cpp/mpmc_node_stack.h
@@ -0,0 +1,187 @@
+#pragma once
+
+#include "../C/Baselib_Memory.h"
+#include "../C/Baselib_Atomic_LLSC.h"
+#include "mpmc_node.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, a stack is an abstract data type that serves as a collection of elements, with two principal operations:
+        // * push, which adds an element to the collection, and
+        // * pop, which removes the most recently added element that was not yet removed.
+        // The order in which elements come off a stack gives rise to its alternative name, LIFO (last in, first out).
+        // Additionally, a peek operation may give access to the top without modifying the stack.
+        // The name "stack" for this type of structure comes from the analogy to a set of physical items stacked on top of each other,
+        // which makes it easy to take an item off the top of the stack, while getting to an item deeper in the stack may require taking off multiple other items first.
+        // Considered as a linear data structure, or more abstractly a sequential collection, the push and pop operations occur only at one end of the structure,
+        // referred to as the top of the stack. This makes it possible to implement a stack as a singly linked list and a pointer to the top element.
+        // A stack may be implemented to have a bounded capacity. If the stack is full and does not contain enough space to accept an entity to be pushed,
+        // the stack is then considered to be in an overflow state. The pop operation removes an item from the top of the stack.
+        //
+        // "Stack (abstract data type)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/wiki/Stack_(abstract_data_type)
+        //
+
+        // This implementation is a lockless node stack capable of handling multiple concurrent producers and consumers
+        //
+        // Node types are required to inherit the mpmc_node class. No data from the inherited class is modified/copied, so no restrictions apply.
+        // The node memory is allocated and destroyed by the user (user owned).
+        // Popped nodes may be overwritten/discarded and/or reused.
+        // Popped nodes may not be deleted (released from user space memory) while any consumer thread is in the scope of a pop call.
+        //
+        // Notes consumer threads:
+        //  While popped nodes may be reused and/or overwritten they must however remain in application readable memory (user space memory) until it can be
+        //  guaranteed no consumer thread is still processing the node i.e. not within the scope of a pop call.
+        //  Even though the value is ignored (discarded by version check) any consumer thread may still read the node link information.
+        //  Consumer threads are concurrently attempting to pop the top of the stack in a DCAS loop and the first to succeed will update the stack top and other
+        //  threads continue processing the next top node in the stack. Threads are garuanteed to progress to pop nodes even if another consumer
+        //  thread falls asleep during a pop call
+        //
+        template<typename T>
+        class alignas(sizeof(intptr_t) * 2)mpmc_node_stack
+        {
+        public:
+            // Create a new stack instance.
+            mpmc_node_stack()
+            {
+                m_Top.obj.ptr = 0;
+                m_Top.obj.idx = 0;
+                atomic_thread_fence(memory_order_seq_cst);
+            }
+
+            // Returns true if stack is empty.
+            bool empty() const
+            {
+                return m_Top.load(memory_order_relaxed).ptr == 0;
+            }
+
+            // Push a node to the top of the stack.
+            void push_back(T* node)
+            {
+                SequencedTopPtr newtop;
+                newtop.ptr = node;
+                if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                {
+                    Baselib_atomic_llsc_ptr_acquire_release_v(&m_Top, &node->next.obj, &newtop, );
+                }
+                else
+                {
+                    SequencedTopPtr top = m_Top.load(memory_order_relaxed);
+                    do
+                    {
+                        node->next.store(top.ptr, memory_order_relaxed);
+                        newtop.idx = top.idx + 1;
+                    }
+                    while (!m_Top.compare_exchange_strong(top, newtop, memory_order_release, memory_order_relaxed));
+                }
+            }
+
+            // Push a linked list of nodes to the top of the stack.
+            void push_back(T* first_node, T* last_node)
+            {
+                SequencedTopPtr newtop;
+                newtop.ptr = first_node;
+                if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                {
+                    Baselib_atomic_llsc_ptr_acquire_release_v(&m_Top, &last_node->next.obj, &newtop, );
+                }
+                else
+                {
+                    SequencedTopPtr top = m_Top.load(memory_order_relaxed);
+                    do
+                    {
+                        last_node->next.store(top.ptr, memory_order_relaxed);
+                        newtop.idx = top.idx + 1;
+                    }
+                    while (!m_Top.compare_exchange_strong(top, newtop, memory_order_release, memory_order_relaxed));
+                }
+            }
+
+            // Try to pop node from the top of the stack.
+            //
+            // \returns top node of the stack or null if the stack is empty.
+            T* try_pop_back()
+            {
+                T* node;
+                SequencedTopPtr newtop;
+                if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                {
+                    Baselib_atomic_llsc_ptr_acquire_release_v(&m_Top, &node, &newtop,
+                    {
+                        if (!node)
+                        {
+                            Baselib_atomic_llsc_break();
+                            break;
+                        }
+                        newtop.ptr = static_cast<T*>(node->next.obj);
+                    });
+                }
+                else
+                {
+                    SequencedTopPtr top = m_Top.load(memory_order_relaxed);
+                    do
+                    {
+                        node = top.ptr;
+                        if (!node)
+                            break;
+                        newtop.ptr = static_cast<T*>(node->next.load(memory_order_relaxed));
+                        newtop.idx = top.idx + 1;
+                    }
+                    while (!m_Top.compare_exchange_strong(top, newtop, memory_order_acquire, memory_order_relaxed));
+                }
+                return node;
+            }
+
+            // Try to pop all nodes from the stack.
+            //
+            // \returns linked list of nodes or null if the stack is empty.
+            T* try_pop_all()
+            {
+                T* node;
+                SequencedTopPtr newtop;
+                newtop.ptr = 0;
+                if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                {
+                    Baselib_atomic_llsc_ptr_acquire_release_v(&m_Top, &node, &newtop,
+                    {
+                        if (!node)
+                        {
+                            Baselib_atomic_llsc_break();
+                            break;
+                        }
+                    });
+                }
+                else
+                {
+                    SequencedTopPtr top = m_Top.load(memory_order_relaxed);
+                    do
+                    {
+                        node = top.ptr;
+                        if (!node)
+                            break;
+                        newtop.idx = top.idx + 1;
+                    }
+                    while (!m_Top.compare_exchange_strong(top, newtop, memory_order_acquire, memory_order_relaxed));
+                }
+                return node;
+            }
+
+        private:
+            typedef struct
+            {
+                T*       ptr;
+                intptr_t idx;
+            } SequencedTopPtr;
+
+            // Space out atomic members to individual cache lines. Required for native LLSC operations on some architectures, others to avoid false sharing
+            char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE];
+            atomic<SequencedTopPtr> m_Top;
+            char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - sizeof(SequencedTopPtr)];
+
+            // Verify mpmc_node is base of T
+            static_assert(std::is_base_of<baselib::mpmc_node, T>::value, "Node class/struct used with baselib::mpmc_node_stack must derive from baselib::mpmc_node.");
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/mpsc_node.h
+++ b/Libraries/external/baselib/Include/Cpp/mpsc_node.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "Atomic.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // mpsc_node container node class. All nodes used by mpsc_node containers must derive from this class.
+        // No initialization or other restrictions apply. Inherited class is not accessed by the mpsc_node containers.
+        class mpsc_node
+        {
+        public:
+            atomic<mpsc_node*> next;
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/mpsc_node_queue.h
+++ b/Libraries/external/baselib/Include/Cpp/mpsc_node_queue.h
@@ -0,0 +1,134 @@
+#pragma once
+
+#include "../C/Baselib_Memory.h"
+#include "mpsc_node.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, a queue is a collection in which the entities in the collection are kept in order and the principal (or only) operations on the
+        // collection are the addition of entities to the rear terminal position, known as enqueue, and removal of entities from the front terminal position, known
+        // as dequeue. This makes the queue a First-In-First-Out (FIFO) data structure. In a FIFO data structure, the first element added to the queue will be the
+        // first one to be removed. This is equivalent to the requirement that once a new element is added, all elements that were added before have to be removed
+        // before the new element can be removed. Often a peek or front operation is also entered, returning the value of the front element without dequeuing it.
+        // A queue is an example of a linear data structure, or more abstractly a sequential collection.
+        //
+        // "Queue (abstract data type)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/w/index.php?title=Queue_(abstract_data_type)&oldid=878671332
+        //
+
+        // This implementation is a lockless node queue capable of handling multiple producers and a single consumer (exclusive access)
+        //
+        // Node types are required to inherit the mpsc_node class. No data from the inherited class is modified/copied, so no restrictions apply.
+        // The node memory is allocated and destroyed by the user (user owned).
+        // Dequeued nodes may be deleted, overwritten/discarded and/or reused.
+        //
+        // Notes consumer threads:
+        //  Only one consumer thread will exclusively access the front node. Other consumer threads will always progress, either by failing to dequeue or
+        //  successfully dequeuing the next node once the current thread thread opens access. As opposed to the parallel consumer implementation,
+        //  this is significantly more performant as no DCAS-operations/loops are involved, but if the consumer thread with current exclusive access falls asleep
+        //  when dequeuing, no other threads will successfully dequeue until the thread wakes up.
+        //  Producer threads always progress independently.
+        //
+        // Notes on producer threads:
+        //  A producer thread swaps the back and writes the link information in two consecutive atomic operations. If a producer thread falls asleep after the
+        //  swap and before the link information has been written, the consumer thread(s) will not advance past this point since it doesn't have
+        //  the information yet. Therefore the consumer threads calls will yield null until that particular producer thread wakes back up.
+        //
+        template<typename T>
+        class alignas(sizeof(intptr_t) * 2)mpsc_node_queue
+        {
+        public:
+            // Create a new queue instance.
+            mpsc_node_queue()
+            {
+                m_Front.obj = 0;
+                m_Back.obj = 0;
+                atomic_thread_fence(memory_order_seq_cst);
+            }
+
+            // Returns true if queue is empty.
+            bool empty() const
+            {
+                return m_Back.load(memory_order_relaxed) == 0;
+            }
+
+            // Push a node to the back of the queue.
+            void push_back(T* node)
+            {
+                node->next.store(0, memory_order_relaxed);
+                if (T* prev = m_Back.exchange(node, memory_order_release))
+                    prev->next.store(node, memory_order_release);
+                else
+                    m_Front.store(node, memory_order_release);
+            }
+
+            // Push a linked list of nodes to the back of the queue.
+            void push_back(T* first_node, T* last_node)
+            {
+                last_node->next.store(0, memory_order_relaxed);
+                if (T* prev = m_Back.exchange(last_node, memory_order_release))
+                    prev->next.store(first_node, memory_order_release);
+                else
+                    m_Front.store(first_node, memory_order_release);
+            }
+
+            // Try to pop frontmost node of the queue.
+            //
+            // Note that if null is returned, there may still be push operations in progress in a producer thread.
+            // Use the "empty" function to check if a queue is empty.
+            //
+            // \returns front node of the queue or null.
+            T* try_pop_front()
+            {
+                T* node, *next, *expected;
+
+                // acquire thread exclusive access of front node, return 0 if fail or queue is empty
+                intptr_t front = m_FrontIntPtr.fetch_or(1, memory_order_acquire);
+                if ((front & 1) | !(front >> 1))
+                    return 0;
+
+                node = (T*)front;
+                next = static_cast<T*>(node->next.load(memory_order_relaxed));
+                if (!next)
+                {
+                    // Set to zero, assuming we got the head. Exclusive access maintained as only producer can write zero.
+                    m_Front.store(0, memory_order_release);
+
+                    // - filters incomplete nodes
+                    // - check if node is back == retrigger new back
+                    expected = node;
+                    if (!m_Back.compare_exchange_strong(expected, 0, memory_order_acquire, memory_order_relaxed))
+                    {
+                        // Back progressed or node is incomplete, reset front ptr and return 0.
+                        m_Front.store(node, memory_order_release);
+                        return 0;
+                    }
+
+                    // Successfully got the back, so just return node.
+                    return node;
+                }
+
+                // Store next (clear block) and return node
+                m_Front.store(next, memory_order_release);
+                return node;
+            }
+
+        private:
+            // Space out atomic members to individual cache lines. Required for native LLSC operations on some architectures, others to avoid false sharing
+            char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE];
+            union
+            {
+                atomic<T*> m_Front;
+                atomic<intptr_t> m_FrontIntPtr;
+            };
+            char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - sizeof(T*)];
+            atomic<T*> m_Back;
+            char _cachelineSpacer2[PLATFORM_CACHE_LINE_SIZE - sizeof(T*)];
+
+            // Verify mpsc_node is base of T
+            static_assert(std::is_base_of<baselib::mpsc_node, T>::value, "Node class/struct used with baselib::mpsc_node_queue must derive from baselib::mpsc_node.");
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/mpsc_node_stack.h
+++ b/Libraries/external/baselib/Include/Cpp/mpsc_node_stack.h
@@ -0,0 +1,199 @@
+#pragma once
+
+#include "../C/Baselib_Memory.h"
+#include "../C/Baselib_Atomic_LLSC.h"
+#include "mpsc_node.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // In computer science, a stack is an abstract data type that serves as a collection of elements, with two principal operations:
+        // * push, which adds an element to the collection, and
+        // * pop, which removes the most recently added element that was not yet removed.
+        // The order in which elements come off a stack gives rise to its alternative name, LIFO (last in, first out).
+        // Additionally, a peek operation may give access to the top without modifying the stack.
+        // The name "stack" for this type of structure comes from the analogy to a set of physical items stacked on top of each other,
+        // which makes it easy to take an item off the top of the stack, while getting to an item deeper in the stack may require taking off multiple other items first.
+        // Considered as a linear data structure, or more abstractly a sequential collection, the push and pop operations occur only at one end of the structure,
+        // referred to as the top of the stack. This makes it possible to implement a stack as a singly linked list and a pointer to the top element.
+        // A stack may be implemented to have a bounded capacity. If the stack is full and does not contain enough space to accept an entity to be pushed,
+        // the stack is then considered to be in an overflow state. The pop operation removes an item from the top of the stack.
+        //
+        // "Stack (abstract data type)", Wikipedia: The Free Encyclopedia
+        // https://en.wikipedia.org/wiki/Stack_(abstract_data_type)
+        //
+
+        // This implementation is a lockless node stack capable of handling multiple producers and a single consumer (exclusive access)
+        //
+        // Node types are required to inherit the mpsc_node class. No data from the inherited class is modified/copied, so no restrictions apply.
+        // The node memory is allocated and destroyed by the user (user owned).
+        // Popped nodes may be deleted, overwritten/discarded and/or reused.
+        //
+        // Notes consumer threads:
+        //  Only one consumer thread will exclusively access the top node. Other consumer threads will always progress, either by failing to pop or
+        //  successfully pop the next node once the current thread thread opens access i.e. if the consumer thread with current exclusive access falls asleep
+        //  when popping, no other threads will successfully pop until the thread wakes up.
+        //  Producer threads always progress independently.
+        //
+        template<typename T>
+        class alignas(sizeof(intptr_t) * 2)mpsc_node_stack
+        {
+        public:
+            // Create a new stack instance.
+            mpsc_node_stack()
+            {
+                m_Top.obj.ptr = 0;
+                m_Top.obj.idx = 0;
+                m_ConsumerLock.obj = false;
+                atomic_thread_fence(memory_order_seq_cst);
+            }
+
+            // Returns true if stack is empty.
+            bool empty() const
+            {
+                return m_Top.load(memory_order_relaxed).ptr == 0;
+            }
+
+            // Push a node to the top of the stack.
+            void push_back(T* node)
+            {
+                SequencedTopPtr newtop;
+                newtop.ptr = node;
+                if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                {
+                    Baselib_atomic_llsc_ptr_acquire_release_v(&m_Top, &node->next.obj, &newtop, );
+                }
+                else
+                {
+                    SequencedTopPtr top = m_Top.load(memory_order_relaxed);
+                    do
+                    {
+                        node->next.store(top.ptr, memory_order_relaxed);
+                        newtop.idx = top.idx + 1;
+                    }
+                    while (!m_Top.compare_exchange_strong(top, newtop, memory_order_release, memory_order_relaxed));
+                }
+            }
+
+            // Push a linked list of nodes to the top of the stack.
+            void push_back(T* first_node, T* last_node)
+            {
+                SequencedTopPtr newtop;
+                newtop.ptr = first_node;
+                if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                {
+                    Baselib_atomic_llsc_ptr_acquire_release_v(&m_Top, &last_node->next.obj, &newtop, );
+                }
+                else
+                {
+                    SequencedTopPtr top = m_Top.load(memory_order_relaxed);
+                    do
+                    {
+                        last_node->next.store(top.ptr, memory_order_relaxed);
+                        newtop.idx = top.idx + 1;
+                    }
+                    while (!m_Top.compare_exchange_strong(top, newtop, memory_order_release, memory_order_relaxed));
+                }
+            }
+
+            // Try to pop node from the top of the stack.
+            //
+            // Note that if null can be returned if another consumer thread has exclusive read access.
+            // Use the "empty" function to check if a stack is empty.
+            //
+            // \returns top node of the stack or null.
+            T* try_pop_back()
+            {
+                if (m_ConsumerLock.exchange(true, memory_order_acquire))
+                    return 0;
+                T* node;
+                SequencedTopPtr newtop;
+                if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                {
+                    Baselib_atomic_llsc_ptr_acquire_release_v(&m_Top, &node, &newtop,
+                    {
+                        if (!node)
+                        {
+                            Baselib_atomic_llsc_break();
+                            break;
+                        }
+                        newtop.ptr = static_cast<T*>(node->next.obj);
+                    });
+                }
+                else
+                {
+                    SequencedTopPtr top = m_Top.load(memory_order_relaxed);
+                    do
+                    {
+                        node = top.ptr;
+                        if (!node)
+                            break;
+                        newtop.ptr = static_cast<T*>(node->next.load(memory_order_relaxed));
+                        newtop.idx = top.idx + 1;
+                    }
+                    while (!m_Top.compare_exchange_strong(top, newtop, memory_order_relaxed, memory_order_relaxed));
+                }
+                m_ConsumerLock.store(false, memory_order_release);
+                return node;
+            }
+
+            // Try to pop all nodes from the stack.
+            //
+            // Note that if null can be returned if another consumer thread has exclusive read access.
+            // Use the "empty" function to check if a stack is empty.
+            //
+            // \returns linked list of nodes or null.
+            T* try_pop_all()
+            {
+                if (m_ConsumerLock.exchange(true, memory_order_acquire))
+                    return 0;
+                T* node;
+                SequencedTopPtr newtop;
+                newtop.ptr = 0;
+                if (PLATFORM_LLSC_NATIVE_SUPPORT)
+                {
+                    Baselib_atomic_llsc_ptr_acquire_release_v(&m_Top, &node, &newtop,
+                    {
+                        if (!node)
+                        {
+                            Baselib_atomic_llsc_break();
+                            break;
+                        }
+                    });
+                }
+                else
+                {
+                    SequencedTopPtr top = m_Top.load(memory_order_relaxed);
+                    do
+                    {
+                        node = top.ptr;
+                        if (!node)
+                            break;
+                        newtop.idx = top.idx + 1;
+                    }
+                    while (!m_Top.compare_exchange_strong(top, newtop, memory_order_relaxed, memory_order_relaxed));
+                }
+                m_ConsumerLock.store(false, memory_order_release);
+                return node;
+            }
+
+        private:
+            typedef struct
+            {
+                T*       ptr;
+                intptr_t idx;
+            } SequencedTopPtr;
+
+            // Space out atomic members to individual cache lines. Required for native LLSC operations on some architectures, others to avoid false sharing
+            char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE];
+            atomic<SequencedTopPtr> m_Top;
+            char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - sizeof(SequencedTopPtr)];
+            atomic<bool> m_ConsumerLock;
+            char _cachelineSpacer2[PLATFORM_CACHE_LINE_SIZE - sizeof(bool)];
+
+            // Verify mpsc_node is base of T
+            static_assert(std::is_base_of<baselib::mpsc_node, T>::value, "Node class/struct used with baselib::mpsc_node_stack must derive from baselib::mpsc_node.");
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/page_allocator.h
+++ b/Libraries/external/baselib/Include/Cpp/page_allocator.h
@@ -0,0 +1,174 @@
+#pragma once
+
+#include "Internal/page_allocator.inl.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // Page allocator implementation providing platform dependent system page allocation.
+        //
+        // Allocations are guaranteed to be aligned to at least the value of `default_alignment`.
+        // All methods with no page state parameter input will default to `default_page_state` where applicable.
+        //
+        // Notes on allocation size:
+        //  All sizes are by allocator standards in bytes. The page allocator internally rounds up sizes to the nearest page size value. Consider this when
+        //  allocating. Use `optimal_size` to retreive number of bytes allocated given a specific size (1 to retreive the page size value).
+        //  Large alignments may lead to a significantly higher use of virtual address space than the amount of memory requested.
+        //  This may result in an aligned page allocation to fail where a less/non-aligned allocation would succeed.
+        //  Note that this is especially common in 32bit applications but a platform may impose additional restrictions on the size of its virtual address space.
+        //  Whether a page allocation is pure virtual address space or already commited memory depends on the platform and passed page state flag.
+        //
+
+        // Page state options
+        typedef enum Memory_PageState
+        {
+            // The page are in a reserved state and any access will cause a seg-fault/access violation.
+            // On some platforms that support this state this may be just a hint to the OS and there is no guarantee pages in this state behave
+            // differently the `NoAccess` state.
+            // The `page_allocator` implementation does a best effort and tries to ensure as best as possible that pages in this state are not commited.
+            Memory_PageState_Reserved             = detail::Memory_PageState_Reserved,
+            // This is a no access page and will cause a seg-fault/access violation when accessed.
+            Memory_PageState_NoAccess             = detail::Memory_PageState_NoAccess,
+            // The memory can only be read.
+            Memory_PageState_ReadOnly             = detail::Memory_PageState_ReadOnly,
+            // The memory can be read and written.
+            Memory_PageState_ReadWrite            = detail::Memory_PageState_ReadWrite,
+            // The memory can be used to execute code and can be read.
+            Memory_PageState_ReadOnly_Executable  = detail::Memory_PageState_ReadOnly_Executable,
+            // The memory can be used to execute code and can be both read and written.
+            Memory_PageState_ReadWrite_Executable = detail::Memory_PageState_ReadWrite_Executable,
+        } Memory_PageState;
+
+        // Allocator
+        template<uint32_t default_alignment = 4096, Memory_PageState default_page_state = Memory_PageState_ReadWrite>
+        class page_allocator
+        {
+            static_assert((default_alignment != 0), "'default_alignment' must not be zero");
+            static_assert(::baselib::Algorithm::IsPowerOfTwo(default_alignment), "'default_alignment' must be a power of two value");
+
+            using impl = detail::page_allocator<default_alignment>;
+            const impl m_Impl;
+
+        public:
+            // Allocated memory is guaranteed to always be aligned to at least the value of `alignment`.
+            static constexpr uint32_t alignment = default_alignment;
+
+            // Typedefs
+            typedef Baselib_ErrorState error_state;
+
+            // Create a new instance with system default page size.
+            page_allocator() : m_Impl() {}
+
+            // Create a new instance with `page_size` sized pages. Page size is required to be supported by the target system.
+            page_allocator(size_t page_size) : m_Impl(page_size)
+            {
+                BaselibAssert((page_size != 0), "'page_size' must not be a zero value");
+                BaselibAssert(::baselib::Algorithm::IsPowerOfTwo(page_size), "'page_size' must be a power of two value");
+            }
+
+            // Allocates number of pages required to hold `size` number of bytes, with initial page state set to `state`
+            //
+            // \returns Address to memory block of allocated memory or `nullptr` if allocation failed.
+            void* allocate(size_t size, Memory_PageState state = default_page_state) const
+            {
+                error_state result = Baselib_ErrorState_Create();
+                return allocate(size, state, &result);
+            }
+
+            // Allocates number of pages required to hold `size` number of bytes, with initial page state set to `state`
+            //
+            // If operation failed `error_state_ptr` contains one of the following error codes:
+            // - Baselib_ErrorCode_InvalidPageSize:         Page size doesn't match any of the available page sizes.
+            // - Baselib_ErrorCode_InvalidPageCount:        Requested number of pages is zero.
+            // - Baselib_ErrorCode_UnsupportedAlignment:    Requested alignment is invalid.
+            // - Baselib_ErrorCode_UnsupportedPageState:    The underlying system doesn't support the requested page state.
+            // - Baselib_ErrorCode_OutOfMemory:             If there is not enough continuous address space available, or physical memory space when acquiring committed memory.
+            //
+            // \returns Address to memory block of allocated memory or `nullptr` if allocation failed.
+            void* allocate(size_t size, Memory_PageState state, error_state *error_state_ptr) const
+            {
+                return m_Impl.allocate(size, state, error_state_ptr);
+            }
+
+            // Reallocate is not supported by the page allocator. The operation is a no-op.
+            //
+            // If `error_state_ptr` is passed it contains the following error code:
+            // - Baselib_ErrorCode_NotSupported:    The operation is not supported by the underlying system.
+            //
+            // \returns Always returns `nullptr`.
+            void* reallocate(void* ptr, size_t old_size, size_t new_size, error_state *error_state_ptr = nullptr) const
+            {
+                if (error_state_ptr)
+                    *error_state_ptr |= RaiseError(Baselib_ErrorCode_NotSupported);
+                return nullptr;
+            }
+
+            // Deallocates memory block in previously allocated or reallocated with `size` pointed to by `ptr`.
+            // A single call of deallocate must encompass the size that were originally allocated with a single call of `allocate`.
+            //
+            // \returns True if the operation was successful.
+            bool deallocate(void* ptr, size_t size) const
+            {
+                error_state result = Baselib_ErrorState_Create();
+                return deallocate(ptr, size, &result);
+            }
+
+            // Deallocates memory block previously allocated or reallocated with `size` pointed to by `ptr`.
+            // A single call of deallocate must encompass the size that were originally allocated with a single call of `allocate`.
+            //
+            // If operation failed `error_state_ptr` contains one of the following error codes:
+            // - Baselib_ErrorCode_InvalidAddressRange: Address range was detected to not match a valid allocation.
+            //                                          CAUTION: Not all platforms are able to detect this and may either raise an error or cause undefined behavior.
+            //                                          Note to implementors: Raising the error is strongly preferred as it helps identifying issues in user code.
+            // - Baselib_ErrorCode_InvalidPageSize:     If page size doesn't match size with previous call to `allocate` with address in `ptr`.
+            //
+            // \returns True if the operation was successful.
+            bool deallocate(void* ptr, size_t size, error_state *error_state_ptr) const
+            {
+                return m_Impl.deallocate(ptr, size, error_state_ptr);
+            }
+
+            // Calculate optimal allocation size given `size`.
+            // The result size is the number of bytes allocated given a specific size.
+            //
+            // \returns Optimal size when allocating memory given `size`.
+            constexpr size_t optimal_size(size_t size) const
+            {
+                return m_Impl.optimal_size(size);
+            }
+
+            // Modifies the page state property of an already allocated virtual address in `ptr` of `size` to `state`.
+            // It is possible to modify only some of the memory allocated by `allocate`.
+            // Address is the address of the first page to modify and so must be aligned to size of page size.
+            // Size is rounded up to the next multiple of page size used.
+            // Passing `nullptr` or a zero page count result in a no-op.
+            //
+            // \returns True if the operation was successful.
+            bool set_page_state(void* ptr, size_t size, Memory_PageState state) const
+            {
+                error_state result = Baselib_ErrorState_Create();
+                return set_page_state(ptr, size, state, &result);
+            }
+
+            // Modifies the page state property of an already allocated virtual address in `ptr` of `size` to `state`.
+            // It is possible to modify only some of the memory allocated by `allocate`.
+            // Address is the address of the first page to modify and so must be aligned to size of page size.
+            // Size is rounded up to the next multiple of page size used.
+            // Passing `nullptr` or a zero page count result in a no-op.
+            //
+            // If operation failed `error_state_ptr` contains one of the following error codes:
+            // - Baselib_ErrorCode_InvalidAddressRange:     Address range is not covered by a valid allocation.
+            //                                              Platforms that emulate page allocations (e.g. Emscripten) are not able to present this error and
+            //                                              will pass the function call silently.
+            // - Baselib_ErrorCode_InvalidPageSize:         If page size doesn't match the previous allocation in `ptr`.
+            // - Baselib_ErrorCode_UnsupportedPageState:    The underlying system doesn't support the requested page state.
+            //
+            // \returns True if the operation was successful.
+            bool set_page_state(void* ptr, size_t size, Memory_PageState state, error_state *error_state_ptr) const
+            {
+                return m_Impl.set_page_state(ptr, size, state, error_state_ptr);
+            }
+        };
+    }
+}
--- a/Libraries/external/baselib/Include/Cpp/tlsf_allocator.h
+++ b/Libraries/external/baselib/Include/Cpp/tlsf_allocator.h
@@ -0,0 +1,219 @@
+#pragma once
+
+#include "Internal/tlsf_allocator.inl.h"
+#include "heap_allocator.h"
+
+namespace baselib
+{
+    BASELIB_CPP_INTERFACE
+    {
+        // tlsf_allocator (Two-Level Segregated Fit)
+        // Lockless, dynamic-sized allocator capable of handling multiple concurrent allocations and deallocations (unless otherwise stated).
+        // The cost (in processor instructions) allocating from the pool is O(1).
+        // Allocating from the pool is lockless, except when capacity is required to increase, in which case the capacity is doubled for the a size range
+        // of the particular allocation size request (see details below).
+        //
+        // Strict segregated fit allocation mechanism is applied, a requested size is rounded up to the next allocator provided size.
+        // The granularity of provided sizes (size ranges) are defined by the `min_size`, `max_size` and `linear_subdivisions` parameters provided.
+        // `optimal_size` can be called to obtain the actual/best fit size of an allocation for a certain requested size.
+        //
+        // A two-Level segregated fit allocator can be said to have two dimensions, or levels.
+        // The first level provides size ranges of pow2 segments.
+        // The second level provides size ranges of the first level pow2 segments size range divided by the `linear_subdivisions` parameter value.
+        // Size range of a given size is be calculated as follows:
+        //
+        // int invSizeMask = ((1 << (int)log2(size)) / linear_subdivisions) - 1;  // Inverse subdivision mask based on Pow2 of `size`, which is effectively range
+        // int lowerBound = (size - 1 & ~invSizeMask) + 1;
+        // int upperBound = lowerBound + invSizeMask;
+        //
+        // As an example, the (internal) size allocated for a requested size of 1500 with linear_subdivisions of 16 is 1536 range(1473-1536).
+        //
+        // Notes on performance/memory requirements:
+        //
+        // - This implementation is a segregated storage algorithm and does not, unlike a segregated fit algorithm (aka buddy allocator) split and coalesce
+        // memory blocks. A segregated fit is well suited for a single threaded/lock-based implementation but would require multiple atomic operations to split
+        // or coalesce blocks.
+        //
+        // - All allocators share a common base instance of the backing allocator `Allocator`, which is used for allocation when the capacity is required to
+        // increase. Memory is only freed up when the tlsf allocator `deallocate_all` or destructor is invoked.
+        // Furthermore, `deallocate_all` is optional to declare in the backing allocator `Allocator` and is if so invoked (once) instead of multiple `deallocate`
+        // calls when `deallocate_all` (or destructor) is invoked on the tlsf allocator.
+        //
+        // - The allocator is constructed with only as many block allocators required for the selected min-max range with linear_subdivisions.
+        // I.e. one allocator with (min_size, max_size, linear_subdivisions) 32,1024,8 has the same memory footprint as two 32,512,8 and 513,1024,8.
+        // If either level allocator only requires a single allocator providing a range, code for calculating allocator indices is optimized away by template
+        // construction. Additionally, if size is known at compile-time (const or sizeof) lookup can be optimized away by the compiler.
+        //
+        // - No overhead per allocation (no header information).
+        //
+        // - Internally, all memory block sizes must be rounded up to a multiple of alignment. I.e if alignment is 64, buckets containing 96 byte size allocations
+        // will in internally use 128 byte blocks. Additionally, smallest size allocated will always be greater than or equal to `linear_subdivisions`.
+        //
+        // - The allocator relies on that the free memory pool must be persisted and read/write accessible as link information of free memory blocks are
+        // read/written to by the allocator operations.
+        //
+        // Examples:
+        // Range is within a single pow2 range, no subdivisions. No lookup code needed.
+        // using BlockAllocator = tlsf_allocator<17, 32, 1>;
+        //
+        // Range is within a single pow2 range with 8 subdivisions, so in this case with linear increments (128/8=16) of bucket sizes. Second level lookup only.
+        // using SegregatedFitAllocatorLinear = tlsf_allocator<129, 256, 8>;
+        //
+        // Range is several pow2 ranges, no subdivisions so pow2 size increments of bucket sizes.
+        // using SegregatedFitAllocatorPow2 = tlsf_allocator<129, 2048, 1>;
+        //
+        // Range is several pow2 ranges, with 32 subdivisions each, so pow2 size increments where each pow2 contains an array of buckets with linear size
+        // increments (pow2sz/32) of bucket sizes.
+        // using TLSFAllocator = tlsf_allocator<129, 2048, 32>;
+        //
+        //
+        // tlsf_allocator<size_t min_size, size_t max_size, size_t linear_subdivisions = 1, class Allocator = baselib::heap_allocator<>>
+        //
+        // min_size             - valid minimum size of allocations.
+        // max_size             - valid maximum size of allocations. Must be less or equal to the size addressable by integral type `size_t` divided by two plus 1.
+        // linear_subdivisions  - number of linear subdivisions of second level allocators (defaults to 1). Must be a power of two and less or equal to `min_size`
+        // Allocator            - Backing memory allocator. Defaults to baselib heap_allocator.
+        //
+        template<size_t min_size, size_t max_size, size_t linear_subdivisions = 1, class Allocator = baselib::heap_allocator<> >
+        class tlsf_allocator : protected detail::tlsf_allocator<min_size, max_size, linear_subdivisions, Allocator>
+        {
+            using Impl = detail::tlsf_allocator<min_size, max_size, linear_subdivisions, Allocator>;
+
+            static_assert(min_size <= max_size, "min_size > max_size");
+            static_assert(min_size >= linear_subdivisions, "min_size < linear_subdivisions");
+            static_assert(max_size <= std::numeric_limits<size_t>::max() / 2 + 1, "max_size > std::numeric_limits<size_t>::max() / 2 + 1");
+            static_assert(baselib::Algorithm::IsPowerOfTwo(linear_subdivisions), "linear_subdivisions != pow2");
+
+        public:
+            // non-copyable
+            tlsf_allocator(const tlsf_allocator& other) = delete;
+            tlsf_allocator& operator=(const tlsf_allocator& other) = delete;
+
+            // non-movable (strictly speaking not needed but listed to signal intent)
+            tlsf_allocator(tlsf_allocator&& other) = delete;
+            tlsf_allocator& operator=(tlsf_allocator&& other) = delete;
+
+            // Allocated memory is guaranteed to always be aligned to at least the value of `alignment`.
+            static constexpr uint32_t alignment = Impl::alignment;
+
+            // Creates a new instance
+            tlsf_allocator()
+            {
+                atomic_thread_fence(memory_order_seq_cst);
+            }
+
+            // Destroy allocator, deallocates any memory allocated.
+            //
+            // If there are other threads currently accessing the allocator behavior is undefined.
+            ~tlsf_allocator() {}
+
+            // Allocates a memory block large enough to hold `size` number of bytes if allocation does not require increasing capacity.
+            //
+            // \returns Address to memory block of allocated memory or nullptr if failed or outside of size range.
+            void* try_allocate(size_t size)
+            {
+                return owns(nullptr, size) ? Impl::try_allocate(size) : nullptr;
+            }
+
+            // Allocates a memory block large enough to hold `size` number of bytes.
+            //
+            // \returns Address to memory block of allocated memory or nullptr if failed or outside of size range
+            void* allocate(size_t size)
+            {
+                return owns(nullptr, size) ? Impl::allocate(size) : nullptr;
+            }
+
+            // Reallocates previously allocated or reallocated memory pointed to by `ptr` from `old_size` to `new_size` number of bytes if reallocation does not
+            // require increasing capacity. Passing `nullptr` in `ptr` yield the same result as calling `try_allocate`.
+            //
+            // \returns Address to memory block of reallocated memory or nullptr if failed or if `new_size` is outside of size range.
+            void* try_reallocate(void* ptr, size_t old_size, size_t new_size)
+            {
+                return owns(nullptr, new_size) ? Impl::try_reallocate(ptr, old_size, new_size) : nullptr;
+            }
+
+            // Reallocates previously allocated or reallocated memory pointed to by `ptr` from `old_size` to `new_size` number of bytes.
+            // Passing `nullptr` in `ptr` yield the same result as calling `allocate`.
+            //
+            // \returns Address to memory block of reallocated memory or nullptr if failed or if `new_size` is outside of size range
+            void* reallocate(void* ptr, size_t old_size, size_t new_size)
+            {
+                return owns(nullptr, new_size) ? Impl::reallocate(ptr, old_size, new_size) : nullptr;
+            }
+
+            // Deallocates memory block previously allocated or reallocated with `size` pointed to by `ptr`.
+            // Passing `nullptr` in `ptr` result in a no-op.
+            //
+            // \returns Always returns `true`
+            bool deallocate(void* ptr, size_t size)
+            {
+                return Impl::deallocate(ptr, size);
+            }
+
+            // Free a linked list of allocations created using `batch_deallocate_link` with `size`.
+            // `first` to `last` is first and last allocation of a `batch_deallocate_link` series of calls.
+            //
+            // \returns Always returns `true`
+            bool batch_deallocate(void* ptr_first, void* ptr_last, size_t size)
+            {
+                return Impl::batch_deallocate(ptr_first, ptr_last, size);
+            }
+
+            // Link previously allocated memory of `size` to another.
+            //
+            // Use to create a linked list of allocations for use with `batch_deallocate(first, last, size)`
+            // Size of linked allocations are required to be equal to `size`.
+            // `nullptr` is a valid argument for `ptr_next`, but is not needed to terminate a linked list.
+            // This is implicit transfer of ownership of the memory back to the allocator.
+            // Memory of the allocation must not be accessed/modified once linked.
+            void batch_deallocate_link(void* ptr, void* ptr_next, size_t size)
+            {
+                Impl::batch_deallocate_link(ptr, ptr_next);
+            }
+
+            // Release all resources and set capacity to zero
+            //
+            // Calling this function invalidates any currently allocated memory
+            // If there are other threads currently accessing the allocator behavior is undefined.
+            void deallocate_all()
+            {
+                Impl::deallocate_all();
+            }
+
+            // Requests that the allocator capacity be at least enough to contain `capacity` for allocations of `size`.
+            //
+            // If `capacity` is less or equal to current capacity for allocations of `size`, the capacity is not affected.
+            // Note that internally, `capacity` is rounded up to the nearest optimal allocation size based on `Allocator` attributes.
+            //
+            // \returns true if successful.
+            bool reserve(size_t size, size_t capacity)
+            {
+                return owns(nullptr, size) ? Impl::reserve(size, capacity) : false;
+            }
+
+            // Get the current capacity of allocations with `size`.
+            size_t capacity(size_t size)
+            {
+                return owns(nullptr, size) ? Impl::capacity(size) : 0;
+            }
+
+            // Calculate optimal allocation size given `size`.
+            //
+            // \returns Optimal size when allocating memory given `size` or zero if outside size range.
+            static constexpr size_t optimal_size(const size_t size)
+            {
+                return owns(nullptr, size) ? Impl::optimal_size(size) : 0;
+            }
+
+            // Checks for the ownership allocation given `ptr` and `size`
+            // It is implementation defined if either or both of `ptr` and `size` are considered to determine ownership.
+            // This allocator does not consider `ptr`.
+            //
+            // \returns True if the allocator owns the allocation.
+            static constexpr bool owns(const void *, size_t size)
+            {
+                return size - min_size <= max_size - min_size;
+            }
+        };
+    }
+}