[add] first
This commit is contained in:
152
Libraries/external/baselib/Include/C/Internal/Baselib_CappedSemaphore_FutexBased.inl.h
vendored
Normal file
152
Libraries/external/baselib/Include/C/Internal/Baselib_CappedSemaphore_FutexBased.inl.h
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_CountdownTimer.h"
|
||||
#include "../Baselib_Atomic_TypeSafe.h"
|
||||
#include "../Baselib_SystemFutex.h"
|
||||
#include "../Baselib_Thread.h"
|
||||
|
||||
#if !PLATFORM_FUTEX_NATIVE_SUPPORT
|
||||
#error "Only use this implementation on top of a proper futex, in all other situations us Baselib_CappedSemaphore_SemaphoreBased.inl.h"
|
||||
#endif
|
||||
|
||||
// Space out to different cache lines.
|
||||
// the idea here is that threads waking up from sleep should not have to
|
||||
// access the cache line where count is stored, and only touch wakeups.
|
||||
// the only exception to that rule is if we hit a timeout.
|
||||
typedef struct Baselib_CappedSemaphore
|
||||
{
|
||||
int32_t wakeups;
|
||||
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
|
||||
int32_t count;
|
||||
const int32_t cap;
|
||||
char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t) * 2]; // Having cap on the same cacheline is fine since it is a constant.
|
||||
} Baselib_CappedSemaphore;
|
||||
|
||||
BASELIB_STATIC_ASSERT(sizeof(Baselib_CappedSemaphore) == PLATFORM_CACHE_LINE_SIZE * 2, "Baselib_CappedSemaphore (Futex) size should match 2*cacheline size (128bytes)");
|
||||
BASELIB_STATIC_ASSERT(offsetof(Baselib_CappedSemaphore, wakeups) ==
|
||||
(offsetof(Baselib_CappedSemaphore, count) - PLATFORM_CACHE_LINE_SIZE), "Baselib_CappedSemaphore (futex) wakeups and count shouldnt share cacheline");
|
||||
|
||||
|
||||
BASELIB_INLINE_API Baselib_CappedSemaphore Baselib_CappedSemaphore_Create(const uint16_t cap)
|
||||
{
|
||||
Baselib_CappedSemaphore semaphore = { 0, {0}, 0, cap, {0} };
|
||||
return semaphore;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Detail_Baselib_CappedSemaphore_ConsumeWakeup(Baselib_CappedSemaphore* semaphore)
|
||||
{
|
||||
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->wakeups);
|
||||
while (previousCount > 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->wakeups, &previousCount, previousCount - 1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryAcquire(Baselib_CappedSemaphore* semaphore)
|
||||
{
|
||||
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
|
||||
while (previousCount > 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_CappedSemaphore_Acquire(Baselib_CappedSemaphore* semaphore)
|
||||
{
|
||||
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return;
|
||||
|
||||
while (!Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore))
|
||||
{
|
||||
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, UINT32_MAX);
|
||||
}
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryTimedAcquire(Baselib_CappedSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return true;
|
||||
|
||||
if (Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore))
|
||||
return true;
|
||||
|
||||
uint32_t timeLeft = timeoutInMilliseconds;
|
||||
const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
|
||||
do
|
||||
{
|
||||
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, timeLeft);
|
||||
if (Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore))
|
||||
return true;
|
||||
timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
|
||||
}
|
||||
while (timeLeft);
|
||||
|
||||
// When timeout occurs we need to make sure we do one of the following:
|
||||
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
|
||||
//
|
||||
// If count is not negative it's likely we are racing with a release operation in which case we
|
||||
// may end up having a successful acquire operation.
|
||||
do
|
||||
{
|
||||
int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
|
||||
while (count < 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
|
||||
return false;
|
||||
}
|
||||
// Likely a race, yield to give the release operation room to complete.
|
||||
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
|
||||
Baselib_Thread_YieldExecution();
|
||||
}
|
||||
while (!Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore));
|
||||
return true;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API uint16_t Baselib_CappedSemaphore_Release(Baselib_CappedSemaphore* semaphore, const uint16_t _count)
|
||||
{
|
||||
int32_t count = _count;
|
||||
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
|
||||
do
|
||||
{
|
||||
if (previousCount == semaphore->cap)
|
||||
return 0;
|
||||
|
||||
if (previousCount + count > semaphore->cap)
|
||||
count = semaphore->cap - previousCount;
|
||||
}
|
||||
while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->count, &previousCount, previousCount + count));
|
||||
|
||||
if (OPTIMIZER_UNLIKELY(previousCount < 0))
|
||||
{
|
||||
const int32_t waitingThreads = -previousCount;
|
||||
const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
|
||||
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
|
||||
Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API uint32_t Baselib_CappedSemaphore_ResetAndReleaseWaitingThreads(Baselib_CappedSemaphore* semaphore)
|
||||
{
|
||||
const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
|
||||
if (OPTIMIZER_LIKELY(count >= 0))
|
||||
return 0;
|
||||
const int32_t threadsToWakeup = -count;
|
||||
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
|
||||
Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_All);
|
||||
return threadsToWakeup;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_CappedSemaphore_Free(Baselib_CappedSemaphore* semaphore)
|
||||
{
|
||||
if (!semaphore)
|
||||
return;
|
||||
const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
|
||||
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
|
||||
}
|
||||
122
Libraries/external/baselib/Include/C/Internal/Baselib_CappedSemaphore_SemaphoreBased.inl.h
vendored
Normal file
122
Libraries/external/baselib/Include/C/Internal/Baselib_CappedSemaphore_SemaphoreBased.inl.h
vendored
Normal file
@@ -0,0 +1,122 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_Atomic_TypeSafe.h"
|
||||
#include "../Baselib_SystemSemaphore.h"
|
||||
#include "../Baselib_Thread.h"
|
||||
|
||||
#if PLATFORM_FUTEX_NATIVE_SUPPORT
|
||||
#error "It's highly recommended to use Baselib_CappedSemaphore_FutexBased.inl.h on platforms which has native semaphore support"
|
||||
#endif
|
||||
|
||||
typedef struct Baselib_CappedSemaphore
|
||||
{
|
||||
Baselib_SystemSemaphore_Handle handle;
|
||||
int32_t count;
|
||||
const int32_t cap;
|
||||
// Make the capped semaphore take a full cache line so that if the user cacheline aligned semaphore,
|
||||
// llsc operations on count will not spuriously fail.
|
||||
char _cachelineSpacer[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t) * 2 - sizeof(Baselib_SystemSemaphore_Handle)];
|
||||
char _systemSemaphoreData[Baselib_SystemSemaphore_PlatformSize];
|
||||
} Baselib_CappedSemaphore;
|
||||
|
||||
BASELIB_STATIC_ASSERT((offsetof(Baselib_CappedSemaphore, count) + PLATFORM_CACHE_LINE_SIZE - sizeof(Baselib_SystemSemaphore_Handle)) ==
|
||||
offsetof(Baselib_CappedSemaphore, _systemSemaphoreData), "count and internalData must not share cacheline");
|
||||
|
||||
BASELIB_INLINE_API Baselib_CappedSemaphore Baselib_CappedSemaphore_Create(uint16_t cap)
|
||||
{
|
||||
Baselib_CappedSemaphore semaphore = {{0}, 0, cap, {0}, {0}};
|
||||
semaphore.handle = Baselib_SystemSemaphore_CreateInplace(&semaphore._systemSemaphoreData);
|
||||
return semaphore;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_CappedSemaphore_Acquire(Baselib_CappedSemaphore* semaphore)
|
||||
{
|
||||
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return;
|
||||
|
||||
Baselib_SystemSemaphore_Acquire(semaphore->handle);
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryAcquire(Baselib_CappedSemaphore* semaphore)
|
||||
{
|
||||
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
|
||||
while (previousCount > 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryTimedAcquire(Baselib_CappedSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return true;
|
||||
|
||||
if (OPTIMIZER_LIKELY(Baselib_SystemSemaphore_TryTimedAcquire(semaphore->handle, timeoutInMilliseconds)))
|
||||
return true;
|
||||
|
||||
// When timeout occurs we need to make sure we do one of the following:
|
||||
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
|
||||
//
|
||||
// If count is not negative it's likely we are racing with a release operation in which case we
|
||||
// may end up having a successful acquire operation.
|
||||
do
|
||||
{
|
||||
int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
|
||||
while (count < 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
|
||||
return false;
|
||||
}
|
||||
// Likely a race, yield to give the release operation room to complete.
|
||||
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
|
||||
Baselib_Thread_YieldExecution();
|
||||
}
|
||||
while (!Baselib_SystemSemaphore_TryAcquire(semaphore->handle));
|
||||
return true;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API uint16_t Baselib_CappedSemaphore_Release(Baselib_CappedSemaphore* semaphore, const uint16_t _count)
|
||||
{
|
||||
int32_t count = _count;
|
||||
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
|
||||
do
|
||||
{
|
||||
if (previousCount == semaphore->cap)
|
||||
return 0;
|
||||
|
||||
if (previousCount + count > semaphore->cap)
|
||||
count = semaphore->cap - previousCount;
|
||||
}
|
||||
while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->count, &previousCount, previousCount + count));
|
||||
|
||||
if (OPTIMIZER_UNLIKELY(previousCount < 0))
|
||||
{
|
||||
const int32_t waitingThreads = -previousCount;
|
||||
const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
|
||||
Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API uint32_t Baselib_CappedSemaphore_ResetAndReleaseWaitingThreads(Baselib_CappedSemaphore* semaphore)
|
||||
{
|
||||
const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
|
||||
if (OPTIMIZER_LIKELY(count >= 0))
|
||||
return 0;
|
||||
const int32_t threadsToWakeup = -count;
|
||||
Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
|
||||
return threadsToWakeup;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_CappedSemaphore_Free(Baselib_CappedSemaphore* semaphore)
|
||||
{
|
||||
if (!semaphore)
|
||||
return;
|
||||
const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
|
||||
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
|
||||
Baselib_SystemSemaphore_FreeInplace(semaphore->handle);
|
||||
}
|
||||
7
Libraries/external/baselib/Include/C/Internal/Baselib_EnumSizeCheck.h
vendored
Normal file
7
Libraries/external/baselib/Include/C/Internal/Baselib_EnumSizeCheck.h
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_StaticAssert.h"
|
||||
|
||||
#define BASELIB_ENUM_ENSURE_ABI_COMPATIBILITY(_enumType) \
|
||||
BASELIB_STATIC_ASSERT(sizeof(_enumType) == 4, \
|
||||
"Baselib assumes that sizeof any enum type is exactly 4 bytes, there might be ABI compatibility problems if violated");
|
||||
198
Libraries/external/baselib/Include/C/Internal/Baselib_EventSemaphore_FutexBased.inl.h
vendored
Normal file
198
Libraries/external/baselib/Include/C/Internal/Baselib_EventSemaphore_FutexBased.inl.h
vendored
Normal file
@@ -0,0 +1,198 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_CountdownTimer.h"
|
||||
#include "../Baselib_Atomic_TypeSafe.h"
|
||||
#include "../Baselib_SystemFutex.h"
|
||||
|
||||
#if !PLATFORM_FUTEX_NATIVE_SUPPORT
|
||||
#error "Only use this implementation on top of a proper futex, in all other situations us Baselib_EventSemaphore_SemaphoreBased.inl.h"
|
||||
#endif
|
||||
|
||||
typedef struct Baselib_EventSemaphore
|
||||
{
|
||||
int32_t state;
|
||||
char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
|
||||
} Baselib_EventSemaphore;
|
||||
|
||||
BASELIB_STATIC_ASSERT(sizeof(Baselib_EventSemaphore) == PLATFORM_CACHE_LINE_SIZE, "Baselib_EventSemaphore size should match cacheline size (64bytes)");
|
||||
|
||||
// The futex based event semaphore is in one of *three* states:
|
||||
// * ResetNoWaitingThreads: EventSemaphore blocks threads, but there aren't any blocked yet
|
||||
// * Reset: EventSemaphore blocks threads and there are some already
|
||||
// * Set: EventSemaphore is not blocking any acquiring threads
|
||||
//
|
||||
// The ResetNoWaitingThreads state is an optimization that allows us to avoid the (comparatively) costly futex notification syscalls.
|
||||
//
|
||||
// In addition, there is a generation counter baked into the state variable in order to prevent lock stealing.
|
||||
// -> Any change in the state during acquire (other than going from ResetNoWaitingThreads to Reset) means that the thread can continue
|
||||
// (since in this case either it was set on the current generation or the generation was changed which implies an earlier release operation)
|
||||
//
|
||||
// Allowed state transitions:
|
||||
// ResetNoWaitingThreads-Gen(X) -> Reset-Gen(X) == Acquire/TryTimedAcquire if no thread was waiting already
|
||||
// ResetNoWaitingThreads-Gen(X) -> Set-Gen(X) == Set but no thread was waiting
|
||||
// Reset-Gen(X) -> Set-Get(X+1) == Set if threads were waiting
|
||||
// Set-Get(X) -> ResetNoWaitingThreads-Gen(X) == Reset/ResetAndReleaseWaitingThreads
|
||||
// Reset-Gen(X) -> ResetNoWaitingThreads-Gen(X+1) == ResetAndReleaseWaitingThreads if threads were waiting
|
||||
//
|
||||
// Note how any state transition from Reset requires increasing the generation counter.
|
||||
|
||||
enum
|
||||
{
|
||||
//Detail_Baselib_EventSemaphore_ResetNoWaitingThreads = 0,
|
||||
Detail_Baselib_EventSemaphore_Set = (uint32_t)1 << 30,
|
||||
Detail_Baselib_EventSemaphore_Reset = (uint32_t)2 << 30,
|
||||
Detail_Baselib_EventSemaphore_GenMask = ~((uint32_t)(1 | 2) << 30)
|
||||
};
|
||||
|
||||
static FORCE_INLINE uint32_t Detail_Baselib_EventSemaphore_Generation(int32_t state)
|
||||
{
|
||||
return state & Detail_Baselib_EventSemaphore_GenMask;
|
||||
}
|
||||
|
||||
// If Detail_Baselib_EventSemaphore_ResetNoWaitingThreads is set, sets Detail_Baselib_EventSemaphore_Reset flag.
|
||||
// Returns last known state of the semaphore.
|
||||
// Does nothing if state changed while this function runs (that includes generation changes while attempting to set the ResetState!)
|
||||
static FORCE_INLINE uint32_t Detail_Baselib_EventSemaphore_TransitionFrom_ResetNoWaitingThreadsState_To_ResetState(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
int32_t state = Baselib_atomic_load_32_acquire(&semaphore->state);
|
||||
const int32_t resetState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Reset;
|
||||
const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
|
||||
while (state == resetNoWaitingThreadsState)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->state, &state, resetState))
|
||||
return resetState;
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API Baselib_EventSemaphore Baselib_EventSemaphore_Create(void)
|
||||
{
|
||||
const Baselib_EventSemaphore semaphore = { 0, {0} };
|
||||
return semaphore;
|
||||
}
|
||||
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_INLINE_API bool Baselib_EventSemaphore_TryAcquire(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
const int32_t state = Baselib_atomic_load_32_acquire(&semaphore->state);
|
||||
return state & Detail_Baselib_EventSemaphore_Set ? true : false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_EventSemaphore_Acquire(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
const int32_t state = Detail_Baselib_EventSemaphore_TransitionFrom_ResetNoWaitingThreadsState_To_ResetState(semaphore);
|
||||
if (state & Detail_Baselib_EventSemaphore_Set)
|
||||
return;
|
||||
do
|
||||
{
|
||||
// State is now in Detail_Baselib_EventSemaphore_Reset-Gen(X).
|
||||
Baselib_SystemFutex_Wait(&semaphore->state, state, UINT32_MAX);
|
||||
// If the state has changed in any way, it is now in either of
|
||||
// Set-Gen(X), Set-Gen(X+n), ResetNoWaitingThreads-Gen(X+n) or Reset(X+n). (with n>0)
|
||||
if (state != Baselib_atomic_load_32_relaxed(&semaphore->state))
|
||||
return;
|
||||
}
|
||||
while (true);
|
||||
}
|
||||
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_INLINE_API bool Baselib_EventSemaphore_TryTimedAcquire(Baselib_EventSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
const int32_t state = Detail_Baselib_EventSemaphore_TransitionFrom_ResetNoWaitingThreadsState_To_ResetState(semaphore);
|
||||
if (state & Detail_Baselib_EventSemaphore_Set)
|
||||
return true;
|
||||
uint32_t timeLeft = timeoutInMilliseconds;
|
||||
const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
|
||||
do
|
||||
{
|
||||
// State is now in Detail_Baselib_EventSemaphore_Reset-Gen(X).
|
||||
Baselib_SystemFutex_Wait(&semaphore->state, state, timeLeft);
|
||||
// If the state has changed in any way, it is now in either of
|
||||
// Set-Gen(X), Set-Gen(X+n), ResetNoWaitingThreads-Gen(X+n) or Reset(X+n). (with n>0)
|
||||
if (state != Baselib_atomic_load_32_relaxed(&semaphore->state))
|
||||
return true;
|
||||
timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
|
||||
}
|
||||
while (timeLeft);
|
||||
|
||||
// The EventSemaphore looks now like there are still threads waiting even if there *might* be none!
|
||||
// This is not an issue however, since it merely means that Set/ResetAndReleaseWaitingThreads will do a potentially redundant futex notification.
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_EventSemaphore_Reset(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
int32_t state = Baselib_atomic_load_32_relaxed(&semaphore->state);
|
||||
const int32_t setState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Set;
|
||||
while (state == setState)
|
||||
{
|
||||
const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
|
||||
if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, resetNoWaitingThreadsState))
|
||||
return;
|
||||
}
|
||||
Baselib_atomic_thread_fence_release();
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_EventSemaphore_Set(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
int32_t state = Baselib_atomic_load_32_relaxed(&semaphore->state);
|
||||
const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
|
||||
const int32_t resetState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Reset;
|
||||
|
||||
// If there is no thread waiting on the semaphore, there is no need to wake & increase the generation count.
|
||||
// Just set it to Set if it isn't already.
|
||||
while (state == resetNoWaitingThreadsState)
|
||||
{
|
||||
const int32_t setState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Set;
|
||||
if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, setState))
|
||||
return;
|
||||
}
|
||||
// If this is not the case however, we do exactly that, increase the generation & wake all threads.
|
||||
while (state == resetState)
|
||||
{
|
||||
const int32_t nextGenSetState = Detail_Baselib_EventSemaphore_Generation(state + 1) | Detail_Baselib_EventSemaphore_Set;
|
||||
if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, nextGenSetState))
|
||||
{
|
||||
Baselib_SystemFutex_Notify(&semaphore->state, UINT32_MAX, Baselib_WakeupFallbackStrategy_All);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// EventSemaphore was already in set state.
|
||||
Baselib_atomic_thread_fence_release();
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_EventSemaphore_ResetAndReleaseWaitingThreads(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
// Note that doing a Baselib_EventSemaphore_Set & Baselib_EventSemaphore_Reset has the same observable effects, just slightly slower.
|
||||
|
||||
int32_t state = Baselib_atomic_load_32_relaxed(&semaphore->state);
|
||||
const int32_t setState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Set;
|
||||
const int32_t resetState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Reset;
|
||||
|
||||
// If there is no thread waiting on the semaphore, there is no need to wake & increase the generation count.
|
||||
// Just set it to ResetNoWaitingThreads if it isn't already.
|
||||
while (state == setState)
|
||||
{
|
||||
const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
|
||||
if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, resetNoWaitingThreadsState))
|
||||
return;
|
||||
}
|
||||
// If this is not the case however, we do exactly that, increase the generation & wake all threads.
|
||||
while (state == resetState)
|
||||
{
|
||||
const int32_t nextGenPendingResetState = Detail_Baselib_EventSemaphore_Generation(state + 1);
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->state, &state, nextGenPendingResetState))
|
||||
{
|
||||
Baselib_SystemFutex_Notify(&semaphore->state, UINT32_MAX, Baselib_WakeupFallbackStrategy_All);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// EventSemaphore was already in ResetNoWaiting threads state.
|
||||
Baselib_atomic_thread_fence_release();
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_EventSemaphore_Free(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
}
|
||||
211
Libraries/external/baselib/Include/C/Internal/Baselib_EventSemaphore_SemaphoreBased.inl.h
vendored
Normal file
211
Libraries/external/baselib/Include/C/Internal/Baselib_EventSemaphore_SemaphoreBased.inl.h
vendored
Normal file
@@ -0,0 +1,211 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_CountdownTimer.h"
|
||||
#include "../Baselib_Atomic_TypeSafe.h"
|
||||
#include "../Baselib_SystemSemaphore.h"
|
||||
#include "../Baselib_StaticAssert.h"
|
||||
|
||||
#if PLATFORM_FUTEX_NATIVE_SUPPORT
|
||||
#error "It's highly recommended to use Baselib_EventSemaphore_FutexBased.inl.h on platforms which has native semaphore support"
|
||||
#endif
|
||||
|
||||
typedef union BASELIB_ALIGN_AS (8) Detail_Baselib_EventSemaphore_State
|
||||
{
|
||||
struct
|
||||
{
|
||||
// Can be changed without checking for changes in numWaitingForSetInProgress (use 32bit cmpex)
|
||||
int32_t numWaitingForSetAndStateFlags;
|
||||
// Typically not changed without checking numWaitingForSetAndStateFlags (use 64bit cmpex)
|
||||
int32_t numWaitingForSetInProgress;
|
||||
} parts;
|
||||
int64_t stateInt64;
|
||||
} Detail_Baselib_EventSemaphore_State;
|
||||
|
||||
enum
|
||||
{
|
||||
// If this flag is set, threads are still waking up from a previous Set or ResetAndReleaseWaitingThreads call.
|
||||
// While this is set, any thread entering an Acquire method (that doesn't see Detail_Baselib_EventSemaphore_SetFlag),
|
||||
// will wait until it is cleared before proceeding with normal operations.
|
||||
Detail_Baselib_EventSemaphore_SetInProgressFlag = (uint32_t)1 << 30,
|
||||
|
||||
// If this flag is set, threads acquiring the semaphore succeed immediately.
|
||||
Detail_Baselib_EventSemaphore_SetFlag = (uint32_t)2 << 30,
|
||||
|
||||
Detail_Baselib_EventSemaphore_NumWaitingForSetMask = ~((uint32_t)(1 | 2) << 30)
|
||||
};
|
||||
|
||||
typedef struct Baselib_EventSemaphore
|
||||
{
|
||||
Detail_Baselib_EventSemaphore_State state;
|
||||
Baselib_SystemSemaphore_Handle setSemaphore;
|
||||
Baselib_SystemSemaphore_Handle setInProgressSemaphore;
|
||||
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - 2 * sizeof(Baselib_SystemSemaphore_Handle) - sizeof(Detail_Baselib_EventSemaphore_State)];
|
||||
char _systemSemaphoreDataSemaphore[Baselib_SystemSemaphore_PlatformSize];
|
||||
char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - Baselib_SystemSemaphore_PlatformSize];
|
||||
char _systemSemaphoreDataInProgressSemaphore[Baselib_SystemSemaphore_PlatformSize];
|
||||
} Baselib_EventSemaphore;
|
||||
|
||||
BASELIB_STATIC_ASSERT((offsetof(Baselib_EventSemaphore, state) + PLATFORM_CACHE_LINE_SIZE) ==
|
||||
offsetof(Baselib_EventSemaphore, _systemSemaphoreDataSemaphore), "state and _systemSemaphoreDataSemaphore must not share cacheline");
|
||||
|
||||
BASELIB_STATIC_ASSERT((offsetof(Baselib_EventSemaphore, _systemSemaphoreDataSemaphore) + PLATFORM_CACHE_LINE_SIZE) ==
|
||||
offsetof(Baselib_EventSemaphore, _systemSemaphoreDataInProgressSemaphore), "_systemSemaphoreDataSemaphore and _systemSemaphoreDataInProgressSemaphore must not share cacheline");
|
||||
|
||||
// How (Timed)Acquire works for the SemaphoreBased EventSemaphore:
|
||||
//
|
||||
// If there is a set pending (Detail_Baselib_EventSemaphore_SetInProgressFlag is set),
|
||||
// it means that not all threads from the previous wakeup call (either via Set or ResetAndReleaseWaitingThreads) have been woken up.
|
||||
// If we would just continue, we might steal the wakeup tokens of those threads! So instead we wait until they are done.
|
||||
//
|
||||
// This is different from the FutexBased version, however there is no way for a user to distinguish that from
|
||||
// a "regular (but lengthy)" preemption at the start of the function.
|
||||
// Meaning that we don't care how often the semaphore got set and reset in the meantime!
|
||||
//
|
||||
//
|
||||
// Invariants:
|
||||
//
|
||||
// Allowed flag state transitions:
|
||||
// 0 -> Set | SetInProgress
|
||||
// Set | SetInProgress <-> Set
|
||||
// Set | SetInProgress <-> SetInProgress
|
||||
// Set -> 0
|
||||
// SetInProgress -> 0
|
||||
//
|
||||
// Additionally:
|
||||
// * numWaitingForSetInProgress can only grow if SetInProgress is set.
|
||||
// * numWaitingForSet can only grow if Set is set
|
||||
|
||||
#ifdef __cplusplus
|
||||
BASELIB_C_INTERFACE
|
||||
{
|
||||
#endif
|
||||
|
||||
BASELIB_API void Detail_Baselib_EventSemaphore_SemaphoreBased_AcquireNonSet(int32_t initialNumWaitingForSetAndStateFlags, Baselib_EventSemaphore* semaphore);
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_API bool Detail_Baselib_EventSemaphore_SemaphoreBased_TryTimedAcquireNonSet(int32_t initialNumWaitingForSetAndStateFlags, Baselib_EventSemaphore* semaphore, uint32_t timeoutInMilliseconds);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // BASELIB_C_INTERFACE
|
||||
#endif
|
||||
|
||||
|
||||
static FORCE_INLINE bool Detail_Baselib_EventSemaphore_IsSet(int32_t numWaitingForSetAndStateFlags)
|
||||
{
|
||||
return (numWaitingForSetAndStateFlags & Detail_Baselib_EventSemaphore_SetFlag) ? true : false;
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool Detail_Baselib_EventSemaphore_IsSetInProgress(int32_t numWaitingForSetAndStateFlags)
|
||||
{
|
||||
return (numWaitingForSetAndStateFlags & Detail_Baselib_EventSemaphore_SetInProgressFlag) ? true : false;
|
||||
}
|
||||
|
||||
static FORCE_INLINE int32_t Detail_Baselib_EventSemaphore_GetWaitingForSetCount(int32_t numWaitingForSetAndStateFlags)
|
||||
{
|
||||
return numWaitingForSetAndStateFlags & Detail_Baselib_EventSemaphore_NumWaitingForSetMask;
|
||||
}
|
||||
|
||||
// Changes WaitingForSet count without affecting state flags
|
||||
static FORCE_INLINE int32_t Detail_Baselib_EventSemaphore_SetWaitingForSetCount(int32_t currentNumWaitingForSetAndStateFlags, int32_t newNumWaitingForSet)
|
||||
{
|
||||
return newNumWaitingForSet | (currentNumWaitingForSetAndStateFlags & (~Detail_Baselib_EventSemaphore_NumWaitingForSetMask));
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API Baselib_EventSemaphore Baselib_EventSemaphore_Create(void)
|
||||
{
|
||||
Baselib_EventSemaphore semaphore = {{{0, 0}}, {0}, {0}, {0}, {0}, {0}, {0}};
|
||||
|
||||
semaphore.setSemaphore = Baselib_SystemSemaphore_CreateInplace(semaphore._systemSemaphoreDataSemaphore);
|
||||
semaphore.setInProgressSemaphore = Baselib_SystemSemaphore_CreateInplace(semaphore._systemSemaphoreDataInProgressSemaphore);
|
||||
return semaphore;
|
||||
}
|
||||
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_INLINE_API bool Baselib_EventSemaphore_TryAcquire(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
const int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_acquire(&semaphore->state.parts.numWaitingForSetAndStateFlags);
|
||||
return Detail_Baselib_EventSemaphore_IsSet(numWaitingForSetAndStateFlags);
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_EventSemaphore_Acquire(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
const int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_acquire(&semaphore->state.parts.numWaitingForSetAndStateFlags);
|
||||
if (!Detail_Baselib_EventSemaphore_IsSet(numWaitingForSetAndStateFlags))
|
||||
Detail_Baselib_EventSemaphore_SemaphoreBased_AcquireNonSet(numWaitingForSetAndStateFlags, semaphore);
|
||||
}
|
||||
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_INLINE_API bool Baselib_EventSemaphore_TryTimedAcquire(Baselib_EventSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
const int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_acquire(&semaphore->state.parts.numWaitingForSetAndStateFlags);
|
||||
if (!Detail_Baselib_EventSemaphore_IsSet(numWaitingForSetAndStateFlags))
|
||||
return Detail_Baselib_EventSemaphore_SemaphoreBased_TryTimedAcquireNonSet(numWaitingForSetAndStateFlags, semaphore, timeoutInMilliseconds);
|
||||
return true;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_EventSemaphore_Reset(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
int32_t resetNumWaitingForSetAndStateFlags;
|
||||
int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_relaxed(&semaphore->state.parts.numWaitingForSetAndStateFlags);
|
||||
do
|
||||
{
|
||||
resetNumWaitingForSetAndStateFlags = numWaitingForSetAndStateFlags & (~Detail_Baselib_EventSemaphore_SetFlag);
|
||||
}
|
||||
while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(
|
||||
&semaphore->state.parts.numWaitingForSetAndStateFlags,
|
||||
&numWaitingForSetAndStateFlags,
|
||||
resetNumWaitingForSetAndStateFlags));
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_EventSemaphore_Set(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_relaxed(&semaphore->state.parts.numWaitingForSetAndStateFlags);
|
||||
int32_t numWaitingForSetAndStateFlagsSet, numWaitingForSet;
|
||||
|
||||
do
|
||||
{
|
||||
numWaitingForSetAndStateFlagsSet = numWaitingForSetAndStateFlags | Detail_Baselib_EventSemaphore_SetFlag;
|
||||
numWaitingForSet = Detail_Baselib_EventSemaphore_GetWaitingForSetCount(numWaitingForSetAndStateFlags);
|
||||
BaselibAssert(numWaitingForSet >= 0, "There needs to be always a non-negative amount of threads waiting for Set");
|
||||
if (numWaitingForSet)
|
||||
numWaitingForSetAndStateFlagsSet |= Detail_Baselib_EventSemaphore_SetInProgressFlag;
|
||||
}
|
||||
while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(
|
||||
&semaphore->state.parts.numWaitingForSetAndStateFlags,
|
||||
&numWaitingForSetAndStateFlags,
|
||||
numWaitingForSetAndStateFlagsSet));
|
||||
|
||||
if (!Detail_Baselib_EventSemaphore_IsSetInProgress(numWaitingForSetAndStateFlags) && numWaitingForSet)
|
||||
Baselib_SystemSemaphore_Release(semaphore->setSemaphore, numWaitingForSet);
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_EventSemaphore_ResetAndReleaseWaitingThreads(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
// Note that doing a Baselib_EventSemaphore_Set & Baselib_EventSemaphore_Reset has the same observable effects, just slightly slower.
|
||||
|
||||
int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_relaxed(&semaphore->state.parts.numWaitingForSetAndStateFlags);
|
||||
int32_t resetNumWaitingForSetAndStateFlags, numWaitingForSet;
|
||||
do
|
||||
{
|
||||
resetNumWaitingForSetAndStateFlags = numWaitingForSetAndStateFlags & (~Detail_Baselib_EventSemaphore_SetFlag);
|
||||
numWaitingForSet = Detail_Baselib_EventSemaphore_GetWaitingForSetCount(numWaitingForSetAndStateFlags);
|
||||
BaselibAssert(numWaitingForSet >= 0, "There needs to be always a non-negative amount of threads waiting for Set");
|
||||
if (numWaitingForSet)
|
||||
resetNumWaitingForSetAndStateFlags |= Detail_Baselib_EventSemaphore_SetInProgressFlag;
|
||||
}
|
||||
while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(
|
||||
&semaphore->state.parts.numWaitingForSetAndStateFlags,
|
||||
&numWaitingForSetAndStateFlags,
|
||||
resetNumWaitingForSetAndStateFlags));
|
||||
|
||||
if (!Detail_Baselib_EventSemaphore_IsSetInProgress(numWaitingForSetAndStateFlags) && numWaitingForSet)
|
||||
Baselib_SystemSemaphore_Release(semaphore->setSemaphore, numWaitingForSet);
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_EventSemaphore_Free(Baselib_EventSemaphore* semaphore)
|
||||
{
|
||||
if (!semaphore)
|
||||
return;
|
||||
|
||||
Baselib_SystemSemaphore_FreeInplace(semaphore->setSemaphore);
|
||||
Baselib_SystemSemaphore_FreeInplace(semaphore->setInProgressSemaphore);
|
||||
}
|
||||
150
Libraries/external/baselib/Include/C/Internal/Baselib_HighCapacitySemaphore_FutexBased.inl.h
vendored
Normal file
150
Libraries/external/baselib/Include/C/Internal/Baselib_HighCapacitySemaphore_FutexBased.inl.h
vendored
Normal file
@@ -0,0 +1,150 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_CountdownTimer.h"
|
||||
#include "../Baselib_Atomic_TypeSafe.h"
|
||||
#include "../Baselib_SystemFutex.h"
|
||||
#include "../Baselib_Thread.h"
|
||||
|
||||
#if !PLATFORM_FUTEX_NATIVE_SUPPORT
|
||||
#error "Only use this implementation on top of a proper futex, in all other situations us Baselib_HighCapacitySemaphore_SemaphoreBased.inl.h"
|
||||
#endif
|
||||
|
||||
// Space out to different cache lines.
|
||||
// the idea here is that threads waking up from sleep should not have to
|
||||
// access the cache line where count is stored, and only touch wakeups.
|
||||
// the only exception to that rule is if we hit a timeout.
|
||||
typedef struct Baselib_HighCapacitySemaphore
|
||||
{
|
||||
int32_t wakeups;
|
||||
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int64_t)];
|
||||
int64_t count;
|
||||
char _cachelineSpacer2[PLATFORM_CACHE_LINE_SIZE - sizeof(int64_t)];
|
||||
} Baselib_HighCapacitySemaphore;
|
||||
|
||||
BASELIB_INLINE_API Baselib_HighCapacitySemaphore Baselib_HighCapacitySemaphore_Create(void)
|
||||
{
|
||||
Baselib_HighCapacitySemaphore semaphore = {0, {0}, 0, {0}};
|
||||
return semaphore;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(Baselib_HighCapacitySemaphore* semaphore)
|
||||
{
|
||||
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->wakeups);
|
||||
while (previousCount > 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->wakeups, &previousCount, previousCount - 1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryAcquire(Baselib_HighCapacitySemaphore* semaphore)
|
||||
{
|
||||
int64_t previousCount = Baselib_atomic_load_64_relaxed(&semaphore->count);
|
||||
while (previousCount > 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_64_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Acquire(Baselib_HighCapacitySemaphore* semaphore)
|
||||
{
|
||||
const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return;
|
||||
|
||||
while (!Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(semaphore))
|
||||
{
|
||||
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, UINT32_MAX);
|
||||
}
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryTimedAcquire(Baselib_HighCapacitySemaphore* semaphore, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return true;
|
||||
|
||||
uint32_t timeLeft = timeoutInMilliseconds;
|
||||
const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
|
||||
do
|
||||
{
|
||||
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, timeLeft);
|
||||
if (Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(semaphore))
|
||||
return true;
|
||||
timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
|
||||
}
|
||||
while (timeLeft);
|
||||
|
||||
// When timeout occurs we need to make sure we do one of the following:
|
||||
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
|
||||
//
|
||||
// If count is not negative it's likely we are racing with a release operation in which case we
|
||||
// may end up having a successful acquire operation.
|
||||
do
|
||||
{
|
||||
int64_t count = Baselib_atomic_load_64_relaxed(&semaphore->count);
|
||||
while (count < 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &count, count + 1))
|
||||
return false;
|
||||
}
|
||||
// Likely a race, yield to give the release operation room to complete.
|
||||
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
|
||||
Baselib_Thread_YieldExecution();
|
||||
}
|
||||
while (!Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(semaphore));
|
||||
return true;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Release(Baselib_HighCapacitySemaphore* semaphore, const uint32_t _count)
|
||||
{
|
||||
const int64_t count = _count;
|
||||
int64_t previousCount = Baselib_atomic_fetch_add_64_release(&semaphore->count, count);
|
||||
|
||||
// This should only be possible if millions of threads enter this function simultaneously posting with a high count.
|
||||
// See overflow protection below.
|
||||
BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", (int32_t)previousCount, (int32_t)count);
|
||||
|
||||
if (OPTIMIZER_UNLIKELY(previousCount < 0))
|
||||
{
|
||||
const int64_t waitingThreads = -previousCount;
|
||||
const int64_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
|
||||
BaselibAssert(threadsToWakeup <= INT32_MAX);
|
||||
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, (int32_t)threadsToWakeup);
|
||||
Baselib_SystemFutex_Notify(&semaphore->wakeups, (int32_t)threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
|
||||
return;
|
||||
}
|
||||
|
||||
// overflow protection
|
||||
// we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
|
||||
// this way we won't have to do clamping on every iteration
|
||||
while (OPTIMIZER_UNLIKELY(previousCount > Baselib_HighCapacitySemaphore_MaxGuaranteedCount * 2))
|
||||
{
|
||||
const int64_t maxCount = Baselib_HighCapacitySemaphore_MaxGuaranteedCount;
|
||||
if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API uint64_t Baselib_HighCapacitySemaphore_ResetAndReleaseWaitingThreads(Baselib_HighCapacitySemaphore* semaphore)
|
||||
{
|
||||
const int64_t count = Baselib_atomic_exchange_64_release(&semaphore->count, 0);
|
||||
if (OPTIMIZER_LIKELY(count >= 0))
|
||||
return 0;
|
||||
const int64_t threadsToWakeup = -count;
|
||||
BaselibAssert(threadsToWakeup <= INT32_MAX);
|
||||
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, (int32_t)threadsToWakeup);
|
||||
Baselib_SystemFutex_Notify(&semaphore->wakeups, (int32_t)threadsToWakeup, Baselib_WakeupFallbackStrategy_All);
|
||||
return threadsToWakeup;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Free(Baselib_HighCapacitySemaphore* semaphore)
|
||||
{
|
||||
if (!semaphore)
|
||||
return;
|
||||
const int64_t count = Baselib_atomic_load_64_seq_cst(&semaphore->count);
|
||||
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
|
||||
}
|
||||
126
Libraries/external/baselib/Include/C/Internal/Baselib_HighCapacitySemaphore_SemaphoreBased.inl.h
vendored
Normal file
126
Libraries/external/baselib/Include/C/Internal/Baselib_HighCapacitySemaphore_SemaphoreBased.inl.h
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_Atomic_TypeSafe.h"
|
||||
#include "../Baselib_SystemSemaphore.h"
|
||||
#include "../Baselib_Thread.h"
|
||||
|
||||
#if PLATFORM_FUTEX_NATIVE_SUPPORT
|
||||
#error "It's highly recommended to use Baselib_HighCapacitySemaphore_FutexBased.inl.h on platforms which has native semaphore support"
|
||||
#endif
|
||||
|
||||
typedef struct Baselib_HighCapacitySemaphore
|
||||
{
|
||||
int64_t count;
|
||||
Baselib_SystemSemaphore_Handle handle;
|
||||
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int64_t) - sizeof(Baselib_SystemSemaphore_Handle)];
|
||||
char _systemSemaphoreData[Baselib_SystemSemaphore_PlatformSize];
|
||||
} Baselib_HighCapacitySemaphore;
|
||||
|
||||
BASELIB_STATIC_ASSERT((offsetof(Baselib_HighCapacitySemaphore, count) + PLATFORM_CACHE_LINE_SIZE) ==
|
||||
offsetof(Baselib_HighCapacitySemaphore, _systemSemaphoreData), "count and internalData must not share cacheline");
|
||||
|
||||
BASELIB_INLINE_API Baselib_HighCapacitySemaphore Baselib_HighCapacitySemaphore_Create(void)
|
||||
{
|
||||
Baselib_HighCapacitySemaphore semaphore = {0, {0}, {0}, {0}};
|
||||
semaphore.handle = Baselib_SystemSemaphore_CreateInplace(&semaphore._systemSemaphoreData);
|
||||
return semaphore;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryAcquire(Baselib_HighCapacitySemaphore* semaphore)
|
||||
{
|
||||
int64_t previousCount = Baselib_atomic_load_64_relaxed(&semaphore->count);
|
||||
while (previousCount > 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_64_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Acquire(Baselib_HighCapacitySemaphore* semaphore)
|
||||
{
|
||||
const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return;
|
||||
|
||||
Baselib_SystemSemaphore_Acquire(semaphore->handle);
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryTimedAcquire(Baselib_HighCapacitySemaphore* semaphore, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return true;
|
||||
|
||||
if (OPTIMIZER_LIKELY(Baselib_SystemSemaphore_TryTimedAcquire(semaphore->handle, timeoutInMilliseconds)))
|
||||
return true;
|
||||
|
||||
// When timeout occurs we need to make sure we do one of the following:
|
||||
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
|
||||
//
|
||||
// If count is not negative it's likely we are racing with a release operation in which case we
|
||||
// may end up having a successful acquire operation.
|
||||
do
|
||||
{
|
||||
int64_t count = Baselib_atomic_load_64_relaxed(&semaphore->count);
|
||||
while (count < 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &count, count + 1))
|
||||
return false;
|
||||
}
|
||||
// Likely a race, yield to give the release operation room to complete.
|
||||
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
|
||||
Baselib_Thread_YieldExecution();
|
||||
}
|
||||
while (!Baselib_SystemSemaphore_TryAcquire(semaphore->handle));
|
||||
return true;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Release(Baselib_HighCapacitySemaphore* semaphore, const uint32_t _count)
|
||||
{
|
||||
const int64_t count = _count;
|
||||
int64_t previousCount = Baselib_atomic_fetch_add_64_release(&semaphore->count, count);
|
||||
|
||||
// This should only be possible if millions of threads enter this function simultaneously posting with a high count.
|
||||
// See overflow protection below.
|
||||
BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", (int32_t)previousCount, (int32_t)count);
|
||||
|
||||
if (OPTIMIZER_UNLIKELY(previousCount < 0))
|
||||
{
|
||||
const int64_t waitingThreads = -previousCount;
|
||||
const int64_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
|
||||
BaselibAssert(threadsToWakeup <= (int64_t)UINT32_MAX);
|
||||
Baselib_SystemSemaphore_Release(semaphore->handle, (uint32_t)threadsToWakeup);
|
||||
return;
|
||||
}
|
||||
|
||||
// overflow protection
|
||||
// we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
|
||||
// this way we won't have to do clamping on every iteration
|
||||
while (OPTIMIZER_UNLIKELY(previousCount > Baselib_HighCapacitySemaphore_MaxGuaranteedCount * 2))
|
||||
{
|
||||
const int64_t maxCount = Baselib_HighCapacitySemaphore_MaxGuaranteedCount;
|
||||
if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API uint64_t Baselib_HighCapacitySemaphore_ResetAndReleaseWaitingThreads(Baselib_HighCapacitySemaphore* semaphore)
|
||||
{
|
||||
const int64_t count = Baselib_atomic_exchange_64_release(&semaphore->count, 0);
|
||||
if (OPTIMIZER_LIKELY(count >= 0))
|
||||
return 0;
|
||||
const int64_t threadsToWakeup = -count;
|
||||
BaselibAssert(threadsToWakeup <= (int64_t)UINT32_MAX);
|
||||
Baselib_SystemSemaphore_Release(semaphore->handle, (uint32_t)threadsToWakeup);
|
||||
return threadsToWakeup;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Free(Baselib_HighCapacitySemaphore* semaphore)
|
||||
{
|
||||
if (!semaphore)
|
||||
return;
|
||||
const int64_t count = Baselib_atomic_load_64_seq_cst(&semaphore->count);
|
||||
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
|
||||
Baselib_SystemSemaphore_FreeInplace(semaphore->handle);
|
||||
}
|
||||
92
Libraries/external/baselib/Include/C/Internal/Baselib_Lock_FutexBased.inl.h
vendored
Normal file
92
Libraries/external/baselib/Include/C/Internal/Baselib_Lock_FutexBased.inl.h
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_CountdownTimer.h"
|
||||
#include "../Baselib_Atomic_TypeSafe.h"
|
||||
#include "../Baselib_SystemFutex.h"
|
||||
|
||||
enum Detail_Baselib_Lock_State
|
||||
{
|
||||
Detail_Baselib_Lock_UNLOCKED = 0,
|
||||
Detail_Baselib_Lock_LOCKED = 1,
|
||||
Detail_Baselib_Lock_CONTENDED = 2,
|
||||
};
|
||||
typedef struct Baselib_Lock
|
||||
{
|
||||
int32_t state;
|
||||
char _cachelineSpacer[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
|
||||
} Baselib_Lock;
|
||||
|
||||
BASELIB_INLINE_API Baselib_Lock Baselib_Lock_Create(void)
|
||||
{
|
||||
Baselib_Lock lock = {Detail_Baselib_Lock_UNLOCKED, {0}};
|
||||
return lock;
|
||||
}
|
||||
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_INLINE_API bool Baselib_Lock_TryAcquire(Baselib_Lock* lock)
|
||||
{
|
||||
int32_t previousState = Detail_Baselib_Lock_UNLOCKED;
|
||||
do
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&lock->state, &previousState, Detail_Baselib_Lock_LOCKED))
|
||||
return true;
|
||||
}
|
||||
while (previousState == Detail_Baselib_Lock_UNLOCKED);
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Lock_Acquire(Baselib_Lock* lock)
|
||||
{
|
||||
int32_t previousState = Detail_Baselib_Lock_UNLOCKED;
|
||||
do
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&lock->state, &previousState, previousState + 1))
|
||||
break;
|
||||
}
|
||||
while (previousState != Detail_Baselib_Lock_CONTENDED);
|
||||
|
||||
while (OPTIMIZER_LIKELY(previousState != Detail_Baselib_Lock_UNLOCKED))
|
||||
{
|
||||
Baselib_SystemFutex_Wait(&lock->state, Detail_Baselib_Lock_CONTENDED, UINT32_MAX);
|
||||
previousState = Baselib_atomic_exchange_32_relaxed(&lock->state, Detail_Baselib_Lock_CONTENDED);
|
||||
}
|
||||
}
|
||||
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_INLINE_API bool Baselib_Lock_TryTimedAcquire(Baselib_Lock* lock, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
int32_t previousState = Detail_Baselib_Lock_UNLOCKED;
|
||||
do
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&lock->state, &previousState, previousState + 1))
|
||||
break;
|
||||
}
|
||||
while (previousState != Detail_Baselib_Lock_CONTENDED);
|
||||
|
||||
if (OPTIMIZER_LIKELY(previousState == Detail_Baselib_Lock_UNLOCKED))
|
||||
return true;
|
||||
|
||||
uint32_t timeLeft = timeoutInMilliseconds;
|
||||
const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
|
||||
do
|
||||
{
|
||||
Baselib_SystemFutex_Wait(&lock->state, Detail_Baselib_Lock_CONTENDED, timeoutInMilliseconds);
|
||||
const int32_t previousState = Baselib_atomic_exchange_32_relaxed(&lock->state, Detail_Baselib_Lock_CONTENDED);
|
||||
if (previousState == Detail_Baselib_Lock_UNLOCKED)
|
||||
return true;
|
||||
timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
|
||||
}
|
||||
while (timeLeft);
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Lock_Release(Baselib_Lock* lock)
|
||||
{
|
||||
const int32_t previousState = Baselib_atomic_exchange_32_release(&lock->state, Detail_Baselib_Lock_UNLOCKED);
|
||||
if (previousState == Detail_Baselib_Lock_CONTENDED)
|
||||
Baselib_SystemFutex_Notify(&lock->state, 1, Baselib_WakeupFallbackStrategy_OneByOne);
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Lock_Free(Baselib_Lock* lock)
|
||||
{
|
||||
}
|
||||
46
Libraries/external/baselib/Include/C/Internal/Baselib_Lock_SemaphoreBased.inl.h
vendored
Normal file
46
Libraries/external/baselib/Include/C/Internal/Baselib_Lock_SemaphoreBased.inl.h
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_CountdownTimer.h"
|
||||
#include "../Baselib_CappedSemaphore.h"
|
||||
|
||||
typedef struct Baselib_Lock
|
||||
{
|
||||
Baselib_CappedSemaphore semaphore;
|
||||
} Baselib_Lock;
|
||||
|
||||
BASELIB_INLINE_API Baselib_Lock Baselib_Lock_Create(void)
|
||||
{
|
||||
Baselib_Lock lock = { Baselib_CappedSemaphore_Create(1) };
|
||||
uint16_t submittedTokens = Baselib_CappedSemaphore_Release(&lock.semaphore, 1);
|
||||
BaselibAssert(submittedTokens == 1, "CappedSemaphore was unable to accept our token");
|
||||
return lock;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Lock_Acquire(Baselib_Lock* lock)
|
||||
{
|
||||
Baselib_CappedSemaphore_Acquire(&lock->semaphore);
|
||||
}
|
||||
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_INLINE_API bool Baselib_Lock_TryAcquire(Baselib_Lock* lock)
|
||||
{
|
||||
return Baselib_CappedSemaphore_TryAcquire(&lock->semaphore);
|
||||
}
|
||||
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_INLINE_API bool Baselib_Lock_TryTimedAcquire(Baselib_Lock* lock, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
return Baselib_CappedSemaphore_TryTimedAcquire(&lock->semaphore, timeoutInMilliseconds);
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Lock_Release(Baselib_Lock* lock)
|
||||
{
|
||||
Baselib_CappedSemaphore_Release(&lock->semaphore, 1);
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Lock_Free(Baselib_Lock* lock)
|
||||
{
|
||||
if (!lock)
|
||||
return;
|
||||
Baselib_CappedSemaphore_Free(&lock->semaphore);
|
||||
}
|
||||
93
Libraries/external/baselib/Include/C/Internal/Baselib_ReentrantLock.inl.h
vendored
Normal file
93
Libraries/external/baselib/Include/C/Internal/Baselib_ReentrantLock.inl.h
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_Lock.h"
|
||||
#include "../Baselib_StaticAssert.h"
|
||||
#include "../Baselib_Alignment.h"
|
||||
#include "../Baselib_Thread.h"
|
||||
|
||||
typedef struct Baselib_ReentrantLock
|
||||
{
|
||||
Baselib_Lock lock;
|
||||
Baselib_Thread_Id owner;
|
||||
int32_t count;
|
||||
} Baselib_ReentrantLock;
|
||||
|
||||
BASELIB_STATIC_ASSERT((BASELIB_ALIGN_OF(Baselib_ReentrantLock) + offsetof(Baselib_ReentrantLock, owner)) % sizeof(Baselib_Thread_Id) == 0, "Baselib_ReentrantLock::owner is not aligned for atomic use");
|
||||
BASELIB_STATIC_ASSERT((BASELIB_ALIGN_OF(Baselib_ReentrantLock) + offsetof(Baselib_ReentrantLock, count)) % sizeof(int32_t) == 0, "Baselib_ReentrantLock::count is not aligned for atomic use");
|
||||
|
||||
BASELIB_INLINE_API Baselib_ReentrantLock Baselib_ReentrantLock_Create(void)
|
||||
{
|
||||
Baselib_ReentrantLock lock = {Baselib_Lock_Create(), Baselib_Thread_InvalidId, 0};
|
||||
return lock;
|
||||
}
|
||||
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_INLINE_API bool Baselib_ReentrantLock_TryAcquire(Baselib_ReentrantLock* lock)
|
||||
{
|
||||
const Baselib_Thread_Id currentThreadId = Baselib_Thread_GetCurrentThreadId();
|
||||
const Baselib_Thread_Id lockOwner = Baselib_atomic_load_ptr_relaxed(&lock->owner);
|
||||
if (OPTIMIZER_LIKELY(currentThreadId != lockOwner))
|
||||
{
|
||||
if (!Baselib_Lock_TryAcquire(&lock->lock))
|
||||
return false;
|
||||
lock->owner = currentThreadId;
|
||||
lock->count = 1;
|
||||
return true;
|
||||
}
|
||||
lock->count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_ReentrantLock_Acquire(Baselib_ReentrantLock* lock)
|
||||
{
|
||||
const Baselib_Thread_Id currentThreadId = Baselib_Thread_GetCurrentThreadId();
|
||||
const Baselib_Thread_Id lockOwner = Baselib_atomic_load_ptr_relaxed(&lock->owner);
|
||||
if (OPTIMIZER_LIKELY(currentThreadId != lockOwner))
|
||||
{
|
||||
Baselib_Lock_Acquire(&lock->lock);
|
||||
lock->owner = currentThreadId;
|
||||
lock->count = 1;
|
||||
return;
|
||||
}
|
||||
lock->count++;
|
||||
}
|
||||
|
||||
COMPILER_WARN_UNUSED_RESULT
|
||||
BASELIB_INLINE_API bool Baselib_ReentrantLock_TryTimedAcquire(Baselib_ReentrantLock* lock, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
const Baselib_Thread_Id currentThreadId = Baselib_Thread_GetCurrentThreadId();
|
||||
const Baselib_Thread_Id lockOwner = Baselib_atomic_load_ptr_relaxed(&lock->owner);
|
||||
if (OPTIMIZER_LIKELY(currentThreadId != lockOwner))
|
||||
{
|
||||
if (!Baselib_Lock_TryTimedAcquire(&lock->lock, timeoutInMilliseconds))
|
||||
return false;
|
||||
lock->owner = currentThreadId;
|
||||
lock->count = 1;
|
||||
return true;
|
||||
}
|
||||
lock->count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_ReentrantLock_Release(Baselib_ReentrantLock* lock)
|
||||
{
|
||||
if (lock->count > 0)
|
||||
{
|
||||
BaselibAssert(Baselib_atomic_load_ptr_relaxed(&lock->owner) == Baselib_Thread_GetCurrentThreadId(), "A recursive lock can only be unlocked by the locking thread");
|
||||
if (OPTIMIZER_LIKELY(lock->count == 1))
|
||||
{
|
||||
lock->owner = Baselib_Thread_InvalidId;
|
||||
lock->count = 0;
|
||||
Baselib_Lock_Release(&lock->lock);
|
||||
return;
|
||||
}
|
||||
lock->count--;
|
||||
}
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_ReentrantLock_Free(Baselib_ReentrantLock* lock)
|
||||
{
|
||||
if (!lock)
|
||||
return;
|
||||
Baselib_Lock_Free(&lock->lock);
|
||||
}
|
||||
152
Libraries/external/baselib/Include/C/Internal/Baselib_Semaphore_FutexBased.inl.h
vendored
Normal file
152
Libraries/external/baselib/Include/C/Internal/Baselib_Semaphore_FutexBased.inl.h
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_CountdownTimer.h"
|
||||
#include "../Baselib_Atomic_TypeSafe.h"
|
||||
#include "../Baselib_SystemFutex.h"
|
||||
#include "../Baselib_Thread.h"
|
||||
|
||||
#if !PLATFORM_FUTEX_NATIVE_SUPPORT
|
||||
#error "Only use this implementation on top of a proper futex, in all other situations us Baselib_Semaphore_SemaphoreBased.inl.h"
|
||||
#endif
|
||||
|
||||
// Space out to different cache lines.
|
||||
// the idea here is that threads waking up from sleep should not have to
|
||||
// access the cache line where count is stored, and only touch wakeups.
|
||||
// the only exception to that rule is if we hit a timeout.
|
||||
typedef struct Baselib_Semaphore
|
||||
{
|
||||
int32_t wakeups;
|
||||
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
|
||||
int32_t count;
|
||||
char _cachelineSpacer2[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
|
||||
} Baselib_Semaphore;
|
||||
|
||||
BASELIB_STATIC_ASSERT(sizeof(Baselib_Semaphore) == PLATFORM_CACHE_LINE_SIZE * 2, "Baselib_Semaphore (Futex) size should match 2*cacheline size (128bytes)");
|
||||
BASELIB_STATIC_ASSERT(offsetof(Baselib_Semaphore, wakeups) ==
|
||||
(offsetof(Baselib_Semaphore, count) - PLATFORM_CACHE_LINE_SIZE), "Baselib_Semaphore (Futex) wakeups and count shouldnt share cacheline");
|
||||
|
||||
BASELIB_INLINE_API Baselib_Semaphore Baselib_Semaphore_Create(void)
|
||||
{
|
||||
Baselib_Semaphore semaphore = {0, {0}, 0, {0}};
|
||||
return semaphore;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Detail_Baselib_Semaphore_ConsumeWakeup(Baselib_Semaphore* semaphore)
|
||||
{
|
||||
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->wakeups);
|
||||
while (previousCount > 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->wakeups, &previousCount, previousCount - 1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_Semaphore_TryAcquire(Baselib_Semaphore* semaphore)
|
||||
{
|
||||
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
|
||||
while (previousCount > 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Semaphore_Acquire(Baselib_Semaphore* semaphore)
|
||||
{
|
||||
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return;
|
||||
|
||||
while (!Detail_Baselib_Semaphore_ConsumeWakeup(semaphore))
|
||||
{
|
||||
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, UINT32_MAX);
|
||||
}
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_Semaphore_TryTimedAcquire(Baselib_Semaphore* semaphore, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return true;
|
||||
|
||||
uint32_t timeLeft = timeoutInMilliseconds;
|
||||
const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
|
||||
do
|
||||
{
|
||||
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, timeLeft);
|
||||
if (Detail_Baselib_Semaphore_ConsumeWakeup(semaphore))
|
||||
return true;
|
||||
timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
|
||||
}
|
||||
while (timeLeft);
|
||||
|
||||
// When timeout occurs we need to make sure we do one of the following:
|
||||
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
|
||||
//
|
||||
// If count is not negative it's likely we are racing with a release operation in which case we
|
||||
// may end up having a successful acquire operation.
|
||||
do
|
||||
{
|
||||
int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
|
||||
while (count < 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
|
||||
return false;
|
||||
}
|
||||
// Likely a race, yield to give the release operation room to complete.
|
||||
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
|
||||
Baselib_Thread_YieldExecution();
|
||||
}
|
||||
while (!Detail_Baselib_Semaphore_ConsumeWakeup(semaphore));
|
||||
return true;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Semaphore_Release(Baselib_Semaphore* semaphore, const uint16_t _count)
|
||||
{
|
||||
const int32_t count = _count;
|
||||
int32_t previousCount = Baselib_atomic_fetch_add_32_release(&semaphore->count, count);
|
||||
|
||||
// This should only be possible if thousands of threads enter this function simultaneously posting with a high count.
|
||||
// See overflow protection below.
|
||||
BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", previousCount, count);
|
||||
|
||||
if (OPTIMIZER_UNLIKELY(previousCount < 0))
|
||||
{
|
||||
const int32_t waitingThreads = -previousCount;
|
||||
const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
|
||||
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
|
||||
Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
|
||||
return;
|
||||
}
|
||||
|
||||
// overflow protection
|
||||
// we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
|
||||
// this way we won't have to do clamping on every iteration
|
||||
while (OPTIMIZER_UNLIKELY(previousCount > Baselib_Semaphore_MaxGuaranteedCount * 2))
|
||||
{
|
||||
const int32_t maxCount = Baselib_Semaphore_MaxGuaranteedCount;
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API uint32_t Baselib_Semaphore_ResetAndReleaseWaitingThreads(Baselib_Semaphore* semaphore)
|
||||
{
|
||||
const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
|
||||
if (OPTIMIZER_LIKELY(count >= 0))
|
||||
return 0;
|
||||
const int32_t threadsToWakeup = -count;
|
||||
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
|
||||
Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_All);
|
||||
return threadsToWakeup;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Semaphore_Free(Baselib_Semaphore* semaphore)
|
||||
{
|
||||
if (!semaphore)
|
||||
return;
|
||||
const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
|
||||
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
|
||||
}
|
||||
126
Libraries/external/baselib/Include/C/Internal/Baselib_Semaphore_SemaphoreBased.inl.h
vendored
Normal file
126
Libraries/external/baselib/Include/C/Internal/Baselib_Semaphore_SemaphoreBased.inl.h
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
#pragma once
|
||||
|
||||
#include "../Baselib_Atomic_TypeSafe.h"
|
||||
#include "../Baselib_SystemSemaphore.h"
|
||||
#include "../Baselib_Thread.h"
|
||||
|
||||
|
||||
#if PLATFORM_FUTEX_NATIVE_SUPPORT
|
||||
#error "It's highly recommended to use Baselib_Semaphore_FutexBased.inl.h on platforms which has native semaphore support"
|
||||
#endif
|
||||
|
||||
typedef struct Baselib_Semaphore
|
||||
{
|
||||
Baselib_SystemSemaphore_Handle handle;
|
||||
int32_t count;
|
||||
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t) - sizeof(Baselib_SystemSemaphore_Handle)];
|
||||
char _systemSemaphoreData[Baselib_SystemSemaphore_PlatformSize];
|
||||
} Baselib_Semaphore;
|
||||
|
||||
BASELIB_STATIC_ASSERT((offsetof(Baselib_Semaphore, count) + PLATFORM_CACHE_LINE_SIZE - sizeof(Baselib_SystemSemaphore_Handle)) ==
|
||||
offsetof(Baselib_Semaphore, _systemSemaphoreData), "count and internalData must not share cacheline");
|
||||
|
||||
BASELIB_INLINE_API Baselib_Semaphore Baselib_Semaphore_Create(void)
|
||||
{
|
||||
Baselib_Semaphore semaphore = {{0}, 0, {0}, {0}};
|
||||
semaphore.handle = Baselib_SystemSemaphore_CreateInplace(&semaphore._systemSemaphoreData);
|
||||
return semaphore;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_Semaphore_TryAcquire(Baselib_Semaphore* semaphore)
|
||||
{
|
||||
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
|
||||
while (previousCount > 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Semaphore_Acquire(Baselib_Semaphore* semaphore)
|
||||
{
|
||||
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return;
|
||||
|
||||
Baselib_SystemSemaphore_Acquire(semaphore->handle);
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API bool Baselib_Semaphore_TryTimedAcquire(Baselib_Semaphore* semaphore, const uint32_t timeoutInMilliseconds)
|
||||
{
|
||||
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
|
||||
if (OPTIMIZER_LIKELY(previousCount > 0))
|
||||
return true;
|
||||
|
||||
if (OPTIMIZER_LIKELY(Baselib_SystemSemaphore_TryTimedAcquire(semaphore->handle, timeoutInMilliseconds)))
|
||||
return true;
|
||||
|
||||
// When timeout occurs we need to make sure we do one of the following:
|
||||
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
|
||||
//
|
||||
// If count is not negative it's likely we are racing with a release operation in which case we
|
||||
// may end up having a successful acquire operation.
|
||||
do
|
||||
{
|
||||
int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
|
||||
while (count < 0)
|
||||
{
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
|
||||
return false;
|
||||
}
|
||||
// Likely a race, yield to give the release operation room to complete.
|
||||
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
|
||||
Baselib_Thread_YieldExecution();
|
||||
}
|
||||
while (!Baselib_SystemSemaphore_TryAcquire(semaphore->handle));
|
||||
return true;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Semaphore_Release(Baselib_Semaphore* semaphore, const uint16_t _count)
|
||||
{
|
||||
const int32_t count = _count;
|
||||
int32_t previousCount = Baselib_atomic_fetch_add_32_release(&semaphore->count, count);
|
||||
|
||||
// This should only be possible if thousands of threads enter this function simultaneously posting with a high count.
|
||||
// See overflow protection below.
|
||||
BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", previousCount, count);
|
||||
|
||||
if (OPTIMIZER_UNLIKELY(previousCount < 0))
|
||||
{
|
||||
const int32_t waitingThreads = -previousCount;
|
||||
const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
|
||||
Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
|
||||
return;
|
||||
}
|
||||
|
||||
// overflow protection
|
||||
// we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
|
||||
// this way we won't have to do clamping on every iteration
|
||||
while (OPTIMIZER_UNLIKELY(previousCount > Baselib_Semaphore_MaxGuaranteedCount * 2))
|
||||
{
|
||||
const int32_t maxCount = Baselib_Semaphore_MaxGuaranteedCount;
|
||||
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API uint32_t Baselib_Semaphore_ResetAndReleaseWaitingThreads(Baselib_Semaphore* semaphore)
|
||||
{
|
||||
const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
|
||||
if (OPTIMIZER_LIKELY(count >= 0))
|
||||
return 0;
|
||||
const int32_t threadsToWakeup = -count;
|
||||
|
||||
Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
|
||||
return threadsToWakeup;
|
||||
}
|
||||
|
||||
BASELIB_INLINE_API void Baselib_Semaphore_Free(Baselib_Semaphore* semaphore)
|
||||
{
|
||||
if (!semaphore)
|
||||
return;
|
||||
const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
|
||||
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
|
||||
Baselib_SystemSemaphore_FreeInplace(semaphore->handle);
|
||||
}
|
||||
194
Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Gcc.h
vendored
Normal file
194
Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Gcc.h
vendored
Normal file
@@ -0,0 +1,194 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../../C/Baselib_Atomic.h"
|
||||
#include "../../../C/Baselib_Atomic_Macros.h"
|
||||
#include "Baselib_Atomic_Gcc_Apple_LLVM_Patch.h"
|
||||
|
||||
#if COMPILER_GCC && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 7))
|
||||
#pragma message "GNUC: " PP_STRINGIZE(__GNUC__) " GNUC_MINOR: " PP_STRINGIZE(__GNUC_MINOR__)
|
||||
#error "GCC is too old and/or missing compatible atomic built-in functions" PP_STRINGIZE(__GNUC__)
|
||||
#endif
|
||||
|
||||
#define detail_intrinsic_relaxed __ATOMIC_RELAXED
|
||||
#define detail_intrinsic_acquire __ATOMIC_ACQUIRE
|
||||
#define detail_intrinsic_release __ATOMIC_RELEASE
|
||||
#define detail_intrinsic_acq_rel __ATOMIC_ACQ_REL
|
||||
#define detail_intrinsic_seq_cst __ATOMIC_SEQ_CST
|
||||
|
||||
// Patch gcc and clang intrinsics to achieve a sequentially consistent barrier.
|
||||
// As of writing Clang 9, GCC 9 none of them produce a seq cst barrier for load-store operations.
|
||||
// To fix this we switch load store to be acquire release with a full final barrier.
|
||||
|
||||
#define detail_ldst_intrinsic_relaxed detail_intrinsic_relaxed
|
||||
#define detail_ldst_intrinsic_acquire detail_intrinsic_acquire
|
||||
#define detail_ldst_intrinsic_release detail_intrinsic_release
|
||||
#define detail_ldst_intrinsic_acq_rel detail_intrinsic_acq_rel
|
||||
#define detail_ldst_intrinsic_seq_cst detail_intrinsic_seq_cst
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#undef detail_ldst_intrinsic_seq_cst
|
||||
#define detail_ldst_intrinsic_seq_cst __ATOMIC_ACQ_REL
|
||||
#define detail_AARCH64_SEQCST_PATCH_BARRIER_relaxed
|
||||
#define detail_AARCH64_SEQCST_PATCH_BARRIER_acquire
|
||||
#define detail_AARCH64_SEQCST_PATCH_BARRIER_release
|
||||
#define detail_AARCH64_SEQCST_PATCH_BARRIER_acq_rel
|
||||
#define detail_AARCH64_SEQCST_PATCH_BARRIER_seq_cst __extension__({__atomic_thread_fence (__ATOMIC_SEQ_CST); });
|
||||
#else
|
||||
#define detail_AARCH64_SEQCST_PATCH_BARRIER_relaxed
|
||||
#define detail_AARCH64_SEQCST_PATCH_BARRIER_acquire
|
||||
#define detail_AARCH64_SEQCST_PATCH_BARRIER_release
|
||||
#define detail_AARCH64_SEQCST_PATCH_BARRIER_acq_rel
|
||||
#define detail_AARCH64_SEQCST_PATCH_BARRIER_seq_cst
|
||||
#endif
|
||||
|
||||
#define detail_THREAD_FENCE(order, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_thread_fence_##order(void) \
|
||||
{ \
|
||||
__extension__({__atomic_thread_fence (detail_intrinsic_##order); }); \
|
||||
} \
|
||||
|
||||
#define detail_LOAD(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(const void* obj, void* result) \
|
||||
{ \
|
||||
__extension__({ __atomic_load((int_type*)obj, (int_type*)result, detail_intrinsic_##order); }); \
|
||||
}
|
||||
|
||||
#define detail_LOAD_NOT_CONST(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result) \
|
||||
{ \
|
||||
__extension__({ __atomic_load((int_type*)obj, (int_type*)result, detail_intrinsic_##order); }); \
|
||||
}
|
||||
|
||||
#define detail_STORE(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value) \
|
||||
{ \
|
||||
__extension__({ __atomic_store((int_type*)obj, (int_type*)value, detail_intrinsic_##order); }); \
|
||||
}
|
||||
|
||||
#define detail_ALU(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result) \
|
||||
{ \
|
||||
*(int_type*)result = __extension__({ __atomic_##op((int_type*)obj, *(int_type*)value, detail_ldst_intrinsic_##order); });\
|
||||
detail_AARCH64_SEQCST_PATCH_BARRIER_##order; \
|
||||
}
|
||||
|
||||
#define detail_XCHG(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result) \
|
||||
{ \
|
||||
__extension__({ __atomic_exchange((int_type*)obj, (int_type*)value, (int_type*)result, detail_ldst_intrinsic_##order); });\
|
||||
detail_AARCH64_SEQCST_PATCH_BARRIER_##order; \
|
||||
}
|
||||
|
||||
#define detail_CMP_XCHG_WEAK(op, order1, order2, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value) \
|
||||
{ \
|
||||
detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value); \
|
||||
bool result = __extension__({ __atomic_compare_exchange( \
|
||||
(int_type*)obj, \
|
||||
(int_type*)expected, \
|
||||
(int_type*)value, \
|
||||
1, \
|
||||
detail_ldst_intrinsic_##order1, \
|
||||
detail_ldst_intrinsic_##order2); \
|
||||
}); \
|
||||
if (result) { detail_AARCH64_SEQCST_PATCH_BARRIER_##order1; } \
|
||||
else { detail_AARCH64_SEQCST_PATCH_BARRIER_##order2;} \
|
||||
return result; \
|
||||
}
|
||||
|
||||
#define detail_CMP_XCHG_STRONG(op, order1, order2, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value) \
|
||||
{ \
|
||||
detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value); \
|
||||
bool result = __extension__ ({ __atomic_compare_exchange( \
|
||||
(int_type*)obj, \
|
||||
(int_type*)expected, \
|
||||
(int_type*)value, \
|
||||
0, \
|
||||
detail_ldst_intrinsic_##order1, \
|
||||
detail_ldst_intrinsic_##order2); \
|
||||
}); \
|
||||
if (result) { detail_AARCH64_SEQCST_PATCH_BARRIER_##order1; } \
|
||||
else { detail_AARCH64_SEQCST_PATCH_BARRIER_##order2;} \
|
||||
return result; \
|
||||
}
|
||||
|
||||
#define detail_NOT_SUPPORTED(...)
|
||||
|
||||
Baselib_Atomic_FOR_EACH_MEMORY_ORDER(
|
||||
detail_THREAD_FENCE
|
||||
)
|
||||
|
||||
Baselib_Atomic_FOR_EACH_ATOMIC_OP_MEMORY_ORDER_AND_TYPE(
|
||||
detail_LOAD, // load
|
||||
detail_STORE, // store
|
||||
detail_ALU, // add
|
||||
detail_ALU, // and
|
||||
detail_ALU, // or
|
||||
detail_ALU, // xor
|
||||
detail_XCHG, // exchange
|
||||
detail_CMP_XCHG_WEAK, // compare_exchange_weak
|
||||
detail_CMP_XCHG_STRONG, // compare_exchange_strong
|
||||
)
|
||||
|
||||
#if PLATFORM_ARCH_64
|
||||
|
||||
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
|
||||
detail_LOAD_NOT_CONST, // load
|
||||
detail_STORE, // store
|
||||
detail_NOT_SUPPORTED, // add
|
||||
detail_NOT_SUPPORTED, // and
|
||||
detail_NOT_SUPPORTED, // or
|
||||
detail_NOT_SUPPORTED, // xor
|
||||
detail_XCHG, // exchange
|
||||
detail_CMP_XCHG_WEAK, // compare_exchange_weak
|
||||
detail_CMP_XCHG_STRONG, // compare_exchange_strong
|
||||
128, 128, __int128 // type information
|
||||
)
|
||||
|
||||
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
|
||||
detail_LOAD_NOT_CONST, // load
|
||||
detail_STORE, // store
|
||||
detail_NOT_SUPPORTED, // add
|
||||
detail_NOT_SUPPORTED, // and
|
||||
detail_NOT_SUPPORTED, // or
|
||||
detail_NOT_SUPPORTED, // xor
|
||||
detail_XCHG, // exchange
|
||||
detail_CMP_XCHG_WEAK, // compare_exchange_weak
|
||||
detail_CMP_XCHG_STRONG, // compare_exchange_strong
|
||||
ptr2x, 128, __int128 // type information
|
||||
)
|
||||
#else
|
||||
|
||||
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
|
||||
detail_LOAD_NOT_CONST, // load
|
||||
detail_STORE, // store
|
||||
detail_NOT_SUPPORTED, // add
|
||||
detail_NOT_SUPPORTED, // and
|
||||
detail_NOT_SUPPORTED, // or
|
||||
detail_NOT_SUPPORTED, // xor
|
||||
detail_XCHG, // exchange
|
||||
detail_CMP_XCHG_WEAK, // compare_exchange_weak
|
||||
detail_CMP_XCHG_STRONG, // compare_exchange_strong
|
||||
ptr2x, 64, int64_t // type information
|
||||
)
|
||||
|
||||
#endif
|
||||
|
||||
#undef detail_intrinsic_relaxed
|
||||
#undef detail_intrinsic_acquire
|
||||
#undef detail_intrinsic_release
|
||||
#undef detail_intrinsic_acq_rel
|
||||
#undef detail_intrinsic_seq_cst
|
||||
|
||||
#undef detail_THREAD_FENCE
|
||||
#undef detail_LOAD
|
||||
#undef detail_LOAD_NOT_CONST
|
||||
#undef detail_STORE
|
||||
#undef detail_ALU
|
||||
#undef detail_XCHG
|
||||
#undef detail_CMP_XCHG_WEAK
|
||||
#undef detail_CMP_XCHG_STRONG
|
||||
#undef detail_NOT_SUPPORTED
|
||||
|
||||
#include "Baselib_Atomic_Gcc_Apple_LLVM_Patch_PostInclude.h"
|
||||
142
Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Gcc_Apple_LLVM_Patch.h
vendored
Normal file
142
Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Gcc_Apple_LLVM_Patch.h
vendored
Normal file
@@ -0,0 +1,142 @@
|
||||
#pragma once
|
||||
|
||||
#if PLATFORM_USE_APPLE_LLVM_ATOMIC_CMPXCHG_128_PATCH
|
||||
|
||||
//
|
||||
// Patch for Apple LLVM version 8.x.x (clang-800.0.38 - clang-900.0.37) intrinsic 128-bit __atomic_compare_exchange implementation (debug, using opt level -O0).
|
||||
// Note that this patch is only in effect on tvOS/iOS AArch64 debug builds for Apple LLVM version 8.x.x. Arm32 verified working without patch.
|
||||
//
|
||||
// Problem:
|
||||
// For the above builds, the __atomic_compare_exchange asm expasion used SUBS/SBCS to compare the pair of "obj" and "expected" values.
|
||||
// SUBS/SBCS does not provide sufficient NZCV flags for comparing two 64-bit values.
|
||||
// The result is erraneous comparison of "obj" and "expected". Some examples:
|
||||
//
|
||||
// -- fails (lo != lo && hi == hi)
|
||||
// obj.lo = 5;
|
||||
// obj.hi = 10;
|
||||
// expected.lo = 3;
|
||||
// expected.hi = 10;
|
||||
//
|
||||
// -- works (expected.lo < 0)
|
||||
// obj.lo = 5;
|
||||
// obj.hi = 20;
|
||||
// expected.lo = -3;
|
||||
// expected.hi = 20;
|
||||
//
|
||||
// -- fails (obj.lo < 0 && hi == hi)
|
||||
// obj.lo = -5;
|
||||
// obj.hi = 30;
|
||||
// expected.lo = 3;
|
||||
// expected.hi = 30;
|
||||
//
|
||||
// -- fails (expected.lo < 0 && obj.hi+1 == expected.hi)
|
||||
// obj.lo = 5;
|
||||
// obj.hi = 3;
|
||||
// expected.lo = -3;
|
||||
// expected.hi = 2;
|
||||
//
|
||||
// Solution: Inline assembly replacement of __atomic_compare_exchange using the same approach as in release mode
|
||||
//
|
||||
// Note: This patch should be removed in it's entirety once we require Apple LLVM version 9 (clang-900.0.37) or higher for building.
|
||||
//
|
||||
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ld_instr, st_instr, barrier_instr) \
|
||||
{ \
|
||||
register bool result asm ("w0"); \
|
||||
asm volatile \
|
||||
( \
|
||||
" ldp x12, x13, [%x4] ; load expected \n" \
|
||||
" ldp x10, x11, [%x5] ; load value \n" \
|
||||
" " #ld_instr " x9, x8, [%x3] ; load obj \n" \
|
||||
" eor x13, x8, x13 ; compare to expected \n" \
|
||||
" eor x12, x9, x12 \n" \
|
||||
" orr x12, x12, x13 \n" \
|
||||
" cbnz x12, 0f ; not equal = no store \n" \
|
||||
" " #st_instr " w12, x10, x11, [%x0] ; try store \n" \
|
||||
" cbnz w12, 1f \n" \
|
||||
" orr w0, wzr, #0x1 ; success, result in w0 \n" \
|
||||
" b 2f \n" \
|
||||
"0: ; no store \n" \
|
||||
" clrex \n" \
|
||||
"1: ; failed store \n" \
|
||||
" movz w0, #0 \n" \
|
||||
"2: ; store expected, fail \n" \
|
||||
" tbnz w0, #0, 3f \n" \
|
||||
" stp x9, x8, [%x1] \n" \
|
||||
"3: \n" \
|
||||
" " #barrier_instr " \n" \
|
||||
\
|
||||
: "+r" (obj), "+r" (expected), "=r" (result) \
|
||||
: "r" (obj), "r" (expected), "r" (value) \
|
||||
: "x8", "x9", "x10", "x11", "x12", "x13", "cc", "memory"); \
|
||||
\
|
||||
return result != 0; \
|
||||
}
|
||||
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_relaxed_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldxp, stxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_release_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldxp, stlxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, dmb ish)
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, dmb ish)
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_seq_cst(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, dmb ish)
|
||||
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ld_instr, st_instr, barrier_instr) \
|
||||
{ \
|
||||
register bool result asm ("w0"); \
|
||||
asm volatile \
|
||||
( \
|
||||
" ldp x10, x11, [%x4] ; load expected \n" \
|
||||
" ldp x12, x13, [%x5] ; load value \n" \
|
||||
"0: \n" \
|
||||
" " #ld_instr " x9, x8, [%x3] ; load obj (ldxp/ldaxp) \n" \
|
||||
" eor x14, x8, x11 ; compare to expected \n" \
|
||||
" eor x15, x9, x10 \n" \
|
||||
" orr x14, x15, x14 \n" \
|
||||
" cbnz x14, 1f ; not equal = no store \n" \
|
||||
" " #st_instr " w14, x12, x13, [%x0] ; try store (stxp/stlxp) \n" \
|
||||
" cbnz w14, 0b ; retry or store result in w0 \n" \
|
||||
" orr w0, wzr, #0x1 \n" \
|
||||
" b 2f \n" \
|
||||
"1: ; no store \n" \
|
||||
" movz w0, #0 \n" \
|
||||
" clrex \n" \
|
||||
"2: ; store expected on fail \n" \
|
||||
" tbnz w0, #0, 3f \n" \
|
||||
" stp x9, x8, [%x1] \n" \
|
||||
"3: \n" \
|
||||
" " #barrier_instr " \n" \
|
||||
\
|
||||
: "+r" (obj), "+r" (expected), "=r" (result) \
|
||||
: "r" (obj), "r" (expected), "r" (value) \
|
||||
: "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "cc", "memory"); \
|
||||
\
|
||||
return result != 0; \
|
||||
}
|
||||
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_relaxed_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldxp, stxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_release_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldxp, stlxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, )
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, dmb ish)
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, dmb ish)
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_seq_cst(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, dmb ish)
|
||||
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value) \
|
||||
if(sizeof(int_type) == 16) \
|
||||
detail_APPLE_LLVM_CMP_XCHG_WEAK_128_##order1##_##order2(obj, expected, value);
|
||||
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value) \
|
||||
if(sizeof(int_type) == 16) \
|
||||
detail_APPLE_LLVM_CMP_XCHG_STRONG_128_##order1##_##order2(obj, expected, value);
|
||||
|
||||
#else // PLATFORM_USE_APPLE_LLVM_ATOMIC_CMPXCHG_128_PATCH
|
||||
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH(...)
|
||||
#define detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH(...)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#if PLATFORM_USE_APPLE_LLVM_ATOMIC_CMPXCHG_128_PATCH
|
||||
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_relaxed_relaxed
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_relaxed
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_acquire
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_release_relaxed
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_relaxed
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_acquire
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_relaxed
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_acquire
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_seq_cst
|
||||
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_relaxed_relaxed
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_relaxed
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_acquire
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_release_relaxed
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_relaxed
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_acquire
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_relaxed
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_acquire
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_seq_cst
|
||||
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH
|
||||
#undef detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH
|
||||
|
||||
#endif
|
||||
40
Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_LLSC_Gcc.inl.h
vendored
Normal file
40
Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_LLSC_Gcc.inl.h
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
// Arm exlusive state access break implementation
|
||||
#define detail_Baselib_atomic_llsc_break() __builtin_arm_clrex()
|
||||
|
||||
// Arm exlusive LLSC implementation using intrinsics.
|
||||
#define detail_Baselib_atomic_llsc_arm_ts(obj, expected, value, code, ll_instr, sc_instr, load_barrier, store_barrier) \
|
||||
do { \
|
||||
do { \
|
||||
*expected = __builtin_arm_##ll_instr(obj); \
|
||||
load_barrier; \
|
||||
code; \
|
||||
} while (__builtin_arm_##sc_instr(*value, obj)); \
|
||||
store_barrier; \
|
||||
} while (false)
|
||||
|
||||
#define detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ll_instr, sc_instr, loadbarrier, storebarrier) \
|
||||
detail_Baselib_atomic_llsc_arm_ts((int_type*)((void*)obj), \
|
||||
(int_type*)((void*)expected), \
|
||||
(int_type*)((void*)value), \
|
||||
code, ll_instr, sc_instr, loadbarrier, storebarrier)
|
||||
|
||||
#define detail_Baselib_atomic_llsc_relaxed_relaxed_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, ,)
|
||||
#if PLATFORM_ARCH_64
|
||||
#define detail_Baselib_atomic_llsc_acquire_relaxed_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldaex, strex, ,)
|
||||
#define detail_Baselib_atomic_llsc_relaxed_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, stlex, ,)
|
||||
#define detail_Baselib_atomic_llsc_acquire_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldaex, stlex, ,)
|
||||
#define detail_Baselib_atomic_llsc_seq_cst_seq_cst_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldaex, stlex, , __builtin_arm_dmb(11) )
|
||||
#else
|
||||
#define detail_Baselib_atomic_llsc_acquire_relaxed_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, __builtin_arm_dmb(11), )
|
||||
#define detail_Baselib_atomic_llsc_relaxed_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, ,__builtin_arm_dmb(11) )
|
||||
#define detail_Baselib_atomic_llsc_acquire_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, __builtin_arm_dmb(11) , __builtin_arm_dmb(11) )
|
||||
#define detail_Baselib_atomic_llsc_seq_cst_seq_cst_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, __builtin_arm_dmb(11) , __builtin_arm_dmb(11) )
|
||||
#endif
|
||||
|
||||
#define detail_Baselib_atomic_llsc_v(obj, expected, value, code, size, loadbarrier, storebarrier) \
|
||||
detail_Baselib_atomic_llsc_##loadbarrier##_##storebarrier##_v(obj, expected, value, code, int##size##_t)
|
||||
|
||||
#define detail_Baselib_atomic_llsc_128_v(obj, expected, value, code, loadbarrier, storebarrier) \
|
||||
detail_Baselib_atomic_llsc_##loadbarrier##_##storebarrier##_v(obj, expected, value, code, __int128)
|
||||
358
Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Msvc.h
vendored
Normal file
358
Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_Msvc.h
vendored
Normal file
@@ -0,0 +1,358 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../../C/Baselib_Atomic.h"
|
||||
#include "../../../C/Baselib_Atomic_Macros.h"
|
||||
|
||||
#include "Baselib_Atomic_MsvcIntrinsics.h"
|
||||
|
||||
#define detail_relaxed_relaxed(...) __VA_ARGS__
|
||||
#define detail_relaxed_acquire(...)
|
||||
#define detail_relaxed_release(...)
|
||||
#define detail_relaxed_acq_rel(...)
|
||||
#define detail_relaxed_seq_cst(...)
|
||||
#define detail_acquire_relaxed(...)
|
||||
#define detail_acquire_acquire(...) __VA_ARGS__
|
||||
#define detail_acquire_release(...)
|
||||
#define detail_acquire_acq_rel(...)
|
||||
#define detail_acquire_seq_cst(...)
|
||||
#define detail_release_relaxed(...)
|
||||
#define detail_release_acquire(...)
|
||||
#define detail_release_release(...) __VA_ARGS__
|
||||
#define detail_release_acq_rel(...)
|
||||
#define detail_release_seq_cst(...)
|
||||
#define detail_acq_rel_relaxed(...)
|
||||
#define detail_acq_rel_acquire(...)
|
||||
#define detail_acq_rel_release(...)
|
||||
#define detail_acq_rel_acq_rel(...) __VA_ARGS__
|
||||
#define detail_acq_rel_seq_cst(...)
|
||||
#define detail_seq_cst_relaxed(...)
|
||||
#define detail_seq_cst_acquire(...)
|
||||
#define detail_seq_cst_release(...)
|
||||
#define detail_seq_cst_acq_rel(...)
|
||||
#define detail_seq_cst_seq_cst(...) __VA_ARGS__
|
||||
|
||||
|
||||
#define detail_relaxed(memory_order, ...) detail_relaxed_##memory_order(__VA_ARGS__)
|
||||
#define detail_acquire(memory_order, ...) detail_acquire_##memory_order(__VA_ARGS__)
|
||||
#define detail_release(memory_order, ...) detail_release_##memory_order(__VA_ARGS__)
|
||||
#define detail_acq_rel(memory_order, ...) detail_acq_rel_##memory_order(__VA_ARGS__)
|
||||
#define detail_seq_cst(memory_order, ...) detail_seq_cst_##memory_order(__VA_ARGS__)
|
||||
|
||||
// Intel
|
||||
// ------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
|
||||
#define detail_intrinsic_relaxed
|
||||
#define detail_intrinsic_acquire
|
||||
#define detail_intrinsic_release
|
||||
#define detail_intrinsic_acq_rel
|
||||
#define detail_intrinsic_seq_cst
|
||||
|
||||
#if defined(_M_X64)
|
||||
|
||||
#define detail_THREAD_FENCE(order, ...) \
|
||||
static COMPILER_FORCEINLINE void Baselib_atomic_thread_fence_##order() \
|
||||
{ \
|
||||
detail_acquire(order, _ReadWriteBarrier()); \
|
||||
detail_release(order, _ReadWriteBarrier()); \
|
||||
detail_acq_rel(order, _ReadWriteBarrier()); \
|
||||
detail_seq_cst(order, __faststorefence()); \
|
||||
}
|
||||
|
||||
#else // #defined(_M_IX86)
|
||||
|
||||
#define detail_THREAD_FENCE(order, ...) \
|
||||
static COMPILER_FORCEINLINE void Baselib_atomic_thread_fence_##order() \
|
||||
{ \
|
||||
detail_acquire(order, _ReadWriteBarrier()); \
|
||||
detail_release(order, _ReadWriteBarrier()); \
|
||||
detail_acq_rel(order, _ReadWriteBarrier()); \
|
||||
detail_seq_cst(order, _ReadWriteBarrier(); __int32 temp = 0; _InterlockedExchange32(&temp, 0); _ReadWriteBarrier()); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define detail_LOAD_BITS_8(obj, result) *(__int8*)result = *(const volatile __int8*)obj
|
||||
#define detail_LOAD_BITS_16(obj, result) *(__int16*)result = *(const volatile __int16*)obj
|
||||
#define detail_LOAD_BITS_32(obj, result) *(__int32*)result = *(const volatile __int32*)obj
|
||||
#if PLATFORM_ARCH_64
|
||||
#define detail_LOAD_BITS_64(obj, result) *(__int64*)result = *(const volatile __int64*)obj
|
||||
#else
|
||||
// x86 32-bit load/store 64-bit integer.
|
||||
// - SSE2 enabled yields (identical to __mm_store/load):
|
||||
// movsd xmm0, QWORD PTR unsigned __int64 obj
|
||||
// movsd QWORD PTR unsigned __int64 result, xmm0
|
||||
// - No SSE2 enabled yields:
|
||||
// fld QWORD PTR unsigned __int64 obj
|
||||
// fstp QWORD PTR unsigned __int64 result
|
||||
// Link comparing various implementations: https://godbolt.org/z/T3zW5M
|
||||
#define detail_LOAD_BITS_64(obj, result) *(double*)result = *(const volatile double*)obj
|
||||
#endif
|
||||
|
||||
#define detail_LOAD(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(const void* obj, void* result) \
|
||||
{ \
|
||||
detail_LOAD_BITS_##bits(obj, result); \
|
||||
detail_acquire(order, _ReadWriteBarrier()); \
|
||||
detail_seq_cst(order, _ReadWriteBarrier()); \
|
||||
}
|
||||
|
||||
#define detail_LOAD_NOT_CONST(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result) \
|
||||
{ \
|
||||
detail_LOAD_BITS_##bits(obj, result); \
|
||||
detail_acquire(order, _ReadWriteBarrier()); \
|
||||
detail_seq_cst(order, _ReadWriteBarrier()); \
|
||||
}
|
||||
|
||||
#define detail_STORE_BITS_8(obj, value) *(volatile __int8*)obj = *(const __int8*)value
|
||||
#define detail_STORE_BITS_16(obj, value) *(volatile __int16*)obj = *(const __int16*)value
|
||||
#define detail_STORE_BITS_32(obj, value) *(volatile __int32*)obj = *(const __int32*)value
|
||||
#if PLATFORM_ARCH_64
|
||||
#define detail_STORE_BITS_64(obj, value) *(volatile __int64*)obj = *(const __int64*)value
|
||||
#else
|
||||
#define detail_STORE_BITS_64(obj, value) *(volatile double*)obj = *(double*)value
|
||||
#endif
|
||||
|
||||
#define detail_STORE(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value) \
|
||||
{ \
|
||||
detail_relaxed(order, detail_STORE_BITS_##bits(obj, value)); \
|
||||
detail_release(order, detail_STORE_BITS_##bits(obj, value); _ReadWriteBarrier()); \
|
||||
detail_seq_cst(order, _InterlockedExchange##bits((__int##bits*)obj, *(const __int##bits*)value)); \
|
||||
}
|
||||
|
||||
// ARM
|
||||
// ------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
#elif defined(_M_ARM) || defined(_M_ARM64)
|
||||
|
||||
#define detail_intrinsic_relaxed _nf
|
||||
#define detail_intrinsic_acquire _acq
|
||||
#define detail_intrinsic_release _rel
|
||||
#define detail_intrinsic_acq_rel
|
||||
#define detail_intrinsic_seq_cst
|
||||
|
||||
#define detail_THREAD_FENCE(order, ...) \
|
||||
static COMPILER_FORCEINLINE void Baselib_atomic_thread_fence_##order() \
|
||||
{ \
|
||||
detail_acquire(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
detail_release(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
detail_acq_rel(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
}
|
||||
|
||||
#define detail_LOAD(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(const void* obj, void* result) \
|
||||
{ \
|
||||
*(__int##bits*)result = __iso_volatile_load##bits((const __int##bits*)obj); \
|
||||
detail_acquire(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
}
|
||||
|
||||
#define detail_LOAD_NOT_CONST(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result) \
|
||||
{ \
|
||||
*(__int##bits*)result = __iso_volatile_load##bits((const __int##bits*)obj); \
|
||||
detail_acquire(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
}
|
||||
|
||||
#define detail_STORE(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value) \
|
||||
{ \
|
||||
detail_release(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
__iso_volatile_store##bits((__int##bits*) obj, *(const __int##bits*)value); \
|
||||
detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH)); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Common
|
||||
// ------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
#define detail_intrinsic_exchange _InterlockedExchange
|
||||
#define detail_intrinsic_fetch_add _InterlockedExchangeAdd
|
||||
#define detail_intrinsic_fetch_and _InterlockedAnd
|
||||
#define detail_intrinsic_fetch_or _InterlockedOr
|
||||
#define detail_intrinsic_fetch_xor _InterlockedXor
|
||||
|
||||
#define detail_LOAD_STORE(op, order, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result) \
|
||||
{ \
|
||||
*(__int##bits##*)result = PP_CONCAT(detail_intrinsic_##op, bits, detail_intrinsic_##order)((__int##bits##*)obj, *(const __int##bits##*)value); \
|
||||
}
|
||||
|
||||
#define detail_CMP_XCHG(op, order1, order2, id , bits, int_type, ...) \
|
||||
static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value) \
|
||||
{ \
|
||||
__int##bits cmp = *(__int##bits##*)expected; \
|
||||
__int##bits result = PP_CONCAT(_InterlockedCompareExchange, bits, detail_intrinsic_##order1)((__int##bits##*)obj, *(__int##bits##*)value, cmp); \
|
||||
return result == cmp ? true : (*(__int##bits##*)expected = result, false); \
|
||||
}
|
||||
|
||||
#define detail_NOT_SUPPORTED(...)
|
||||
|
||||
// Setup implementation
|
||||
// ------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
Baselib_Atomic_FOR_EACH_MEMORY_ORDER(
|
||||
detail_THREAD_FENCE
|
||||
)
|
||||
|
||||
Baselib_Atomic_FOR_EACH_ATOMIC_OP_MEMORY_ORDER_AND_TYPE(
|
||||
detail_LOAD, // load
|
||||
detail_STORE, // store
|
||||
detail_LOAD_STORE, // add
|
||||
detail_LOAD_STORE, // and
|
||||
detail_LOAD_STORE, // or
|
||||
detail_LOAD_STORE, // xor
|
||||
detail_LOAD_STORE, // exchange
|
||||
detail_CMP_XCHG, // compare_exchange_weak
|
||||
detail_CMP_XCHG // compare_exchange_strong
|
||||
)
|
||||
|
||||
#if PLATFORM_ARCH_64
|
||||
|
||||
// 128-bit implementation
|
||||
// There are more efficient ways of doing load, store and exchange on Arm64. Unfortunately MSVC doesn't provide intrinsics for those. The specific
|
||||
// instructions needed to perform atomic load, store and exchange are also not available on MSVC.
|
||||
// Hence we fallback to cmpxchg for all atomic ops.
|
||||
// ------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
#define detail_LOAD128(op, order, id, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result) \
|
||||
{ \
|
||||
Baselib_atomic_compare_exchange_weak_128_##order##_##order##_v((void*)obj, result, result); \
|
||||
}
|
||||
|
||||
#define detail_STORE128(op, order, id, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value) \
|
||||
{ \
|
||||
uint64_t comparand[2] = { ((volatile uint64_t*)obj)[0], ((volatile uint64_t*)obj)[1] }; \
|
||||
while(!Baselib_atomic_compare_exchange_weak_128_##order##_relaxed_v(obj, comparand, value)) \
|
||||
; \
|
||||
}
|
||||
|
||||
#define detail_XCHG128(op, order, id, ...) \
|
||||
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result) \
|
||||
{ \
|
||||
((uint64_t*)result)[0] = ((volatile uint64_t*)obj)[0]; \
|
||||
((uint64_t*)result)[1] = ((volatile uint64_t*)obj)[1]; \
|
||||
while(!Baselib_atomic_compare_exchange_weak_128_##order##_relaxed_v(obj, result, value)) \
|
||||
; \
|
||||
}
|
||||
|
||||
#define detail_CMP_XCHG128(op, order1, order2, id, ...) \
|
||||
static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value) \
|
||||
{ \
|
||||
return PP_CONCAT(_InterlockedCompareExchange128, detail_intrinsic_##order1)( \
|
||||
(__int64*)obj, \
|
||||
((const __int64*)value)[1], \
|
||||
((const __int64*)value)[0], \
|
||||
(__int64*)expected \
|
||||
) == 1; \
|
||||
}
|
||||
|
||||
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
|
||||
detail_LOAD128, // load
|
||||
detail_STORE128, // store
|
||||
detail_NOT_SUPPORTED, // add
|
||||
detail_NOT_SUPPORTED, // and
|
||||
detail_NOT_SUPPORTED, // or
|
||||
detail_NOT_SUPPORTED, // xor
|
||||
detail_XCHG128, // exchange
|
||||
detail_CMP_XCHG128, // compare_exchange_weak
|
||||
detail_CMP_XCHG128, // compare_exchange_strong
|
||||
128
|
||||
)
|
||||
|
||||
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
|
||||
detail_LOAD128, // load
|
||||
detail_STORE128, // store
|
||||
detail_NOT_SUPPORTED, // add
|
||||
detail_NOT_SUPPORTED, // and
|
||||
detail_NOT_SUPPORTED, // or
|
||||
detail_NOT_SUPPORTED, // xor
|
||||
detail_XCHG128, // exchange
|
||||
detail_CMP_XCHG128, // compare_exchange_weak
|
||||
detail_CMP_XCHG128, // compare_exchange_strong
|
||||
ptr2x
|
||||
)
|
||||
|
||||
#undef detail_LOAD128
|
||||
#undef detail_STORE128
|
||||
#undef detail_XCHG128
|
||||
#undef detail_CMP_XCHG128
|
||||
|
||||
#else
|
||||
|
||||
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
|
||||
detail_LOAD_NOT_CONST, // load
|
||||
detail_STORE, // store
|
||||
detail_NOT_SUPPORTED, // add
|
||||
detail_NOT_SUPPORTED, // and
|
||||
detail_NOT_SUPPORTED, // or
|
||||
detail_NOT_SUPPORTED, // xor
|
||||
detail_LOAD_STORE, // exchange
|
||||
detail_CMP_XCHG, // compare_exchange_weak
|
||||
detail_CMP_XCHG, // compare_exchange_strong
|
||||
ptr2x, 64, int64_t
|
||||
)
|
||||
|
||||
#endif
|
||||
|
||||
#undef detail_THREAD_FENCE
|
||||
#undef detail_LOAD
|
||||
#undef detail_LOAD_NOT_CONST
|
||||
#undef detail_STORE
|
||||
#undef detail_LOAD_STORE
|
||||
#undef detail_CMP_XCHG
|
||||
#undef detail_NOT_SUPPORTED
|
||||
|
||||
#undef detail_LOAD_BITS_8
|
||||
#undef detail_LOAD_BITS_16
|
||||
#undef detail_LOAD_BITS_32
|
||||
#undef detail_LOAD_BITS_64
|
||||
#undef detail_STORE_BITS_8
|
||||
#undef detail_STORE_BITS_16
|
||||
#undef detail_STORE_BITS_32
|
||||
#undef detail_STORE_BITS_64
|
||||
|
||||
#undef detail_intrinsic_exchange
|
||||
#undef detail_intrinsic_fetch_add
|
||||
#undef detail_intrinsic_fetch_and
|
||||
#undef detail_intrinsic_fetch_or
|
||||
#undef detail_intrinsic_fetch_xor
|
||||
|
||||
#undef detail_relaxed_relaxed
|
||||
#undef detail_relaxed_acquire
|
||||
#undef detail_relaxed_release
|
||||
#undef detail_relaxed_acq_rel
|
||||
#undef detail_relaxed_seq_cst
|
||||
#undef detail_acquire_relaxed
|
||||
#undef detail_acquire_acquire
|
||||
#undef detail_acquire_release
|
||||
#undef detail_acquire_acq_rel
|
||||
#undef detail_acquire_seq_cst
|
||||
#undef detail_release_relaxed
|
||||
#undef detail_release_acquire
|
||||
#undef detail_release_release
|
||||
#undef detail_release_acq_rel
|
||||
#undef detail_release_seq_cst
|
||||
#undef detail_acq_rel_relaxed
|
||||
#undef detail_acq_rel_acquire
|
||||
#undef detail_acq_rel_release
|
||||
#undef detail_acq_rel_acq_rel
|
||||
#undef detail_acq_rel_seq_cst
|
||||
#undef detail_seq_cst_relaxed
|
||||
#undef detail_seq_cst_acquire
|
||||
#undef detail_seq_cst_release
|
||||
#undef detail_seq_cst_acq_rel
|
||||
#undef detail_seq_cst_seq_cst
|
||||
|
||||
#undef detail_relaxed
|
||||
#undef detail_acquire
|
||||
#undef detail_release
|
||||
#undef detail_acq_rel
|
||||
#undef detail_seq_cst
|
||||
58
Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_MsvcIntrinsics.h
vendored
Normal file
58
Libraries/external/baselib/Include/C/Internal/Compiler/Baselib_Atomic_MsvcIntrinsics.h
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
|
||||
#include <intrin.h>
|
||||
|
||||
#ifndef _ARM_BARRIER_ISH
|
||||
#define _ARM_BARRIER_ISH 0xB
|
||||
#endif
|
||||
|
||||
#define _InterlockedCompareExchange32(obj, value, exp) _InterlockedCompareExchange((long*)obj, value, exp)
|
||||
#define _InterlockedCompareExchange32_nf(obj, value, exp) _InterlockedCompareExchange_nf((long*)obj, value, exp)
|
||||
#define _InterlockedCompareExchange32_acq(obj, value, exp) _InterlockedCompareExchange_acq((long*)obj, value, exp)
|
||||
#define _InterlockedCompareExchange32_rel(obj, value, exp) _InterlockedCompareExchange_rel((long*)obj, value, exp)
|
||||
#define _InterlockedExchange32(obj, value) _InterlockedExchange((long*)obj, value)
|
||||
#define _InterlockedExchange32_nf(obj, value) _InterlockedExchange_nf((long*)obj, value)
|
||||
#define _InterlockedExchange32_acq(obj, value) _InterlockedExchange_acq((long*)obj, value)
|
||||
#define _InterlockedExchange32_rel(obj, value) _InterlockedExchange_rel((long*)obj, value)
|
||||
#define _InterlockedExchangeAdd32(obj, value) _InterlockedExchangeAdd((long*)obj, value)
|
||||
#define _InterlockedExchangeAdd32_nf(obj, value) _InterlockedExchangeAdd_nf((long*)obj, value)
|
||||
#define _InterlockedExchangeAdd32_acq(obj, value) _InterlockedExchangeAdd_acq((long*)obj, value)
|
||||
#define _InterlockedExchangeAdd32_rel(obj, value) _InterlockedExchangeAdd_rel((long*)obj, value)
|
||||
#define _InterlockedAnd32(obj, value) _InterlockedAnd((long*)obj, value)
|
||||
#define _InterlockedAnd32_nf(obj, value) _InterlockedAnd_nf((long*)obj, value)
|
||||
#define _InterlockedAnd32_acq(obj, value) _InterlockedAnd_acq((long*)obj, value)
|
||||
#define _InterlockedAnd32_rel(obj, value) _InterlockedAnd_rel((long*)obj, value)
|
||||
#define _InterlockedOr32(obj, value) _InterlockedOr((long*)obj, value)
|
||||
#define _InterlockedOr32_nf(obj, value) _InterlockedOr_nf((long*)obj, value)
|
||||
#define _InterlockedOr32_acq(obj, value) _InterlockedOr_acq((long*)obj, value)
|
||||
#define _InterlockedOr32_rel(obj, value) _InterlockedOr_rel((long*)obj, value)
|
||||
#define _InterlockedXor32(obj, value) _InterlockedXor((long*)obj, value)
|
||||
#define _InterlockedXor32_nf(obj, value) _InterlockedXor_nf((long*)obj, value)
|
||||
#define _InterlockedXor32_acq(obj, value) _InterlockedXor_acq((long*)obj, value)
|
||||
#define _InterlockedXor32_rel(obj, value) _InterlockedXor_rel((long*)obj, value)
|
||||
|
||||
// Use cmp_xchg on x86 to emulate 64 bit exchange and alu ops
|
||||
#if defined(_M_IX86)
|
||||
|
||||
#undef _InterlockedExchange64
|
||||
#undef _InterlockedExchangeAdd64
|
||||
#undef _InterlockedOr64
|
||||
#undef _InterlockedAnd64
|
||||
#undef _InterlockedXor64
|
||||
|
||||
#define detail_CAS_OP(_name, ...) \
|
||||
static __forceinline __int64 _name(__int64* obj, __int64 value) \
|
||||
{ \
|
||||
__int64 p1, p2 = *obj; \
|
||||
do { p1 = p2; p2 = _InterlockedCompareExchange64(obj, (__VA_ARGS__), p1); } while (p1 != p2); \
|
||||
return p1; \
|
||||
}
|
||||
|
||||
detail_CAS_OP(_InterlockedExchange64, value);
|
||||
detail_CAS_OP(_InterlockedExchangeAdd64, p1 + value);
|
||||
detail_CAS_OP(_InterlockedOr64, p1 | value);
|
||||
detail_CAS_OP(_InterlockedAnd64, p1 & value);
|
||||
detail_CAS_OP(_InterlockedXor64, p1 ^ value);
|
||||
#undef detail_CAS_OP
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user