[add] first

This commit is contained in:
2023-10-08 10:24:48 +08:00
commit b1ae0510a9
1048 changed files with 3254361 additions and 0 deletions

View File

@@ -0,0 +1,152 @@
#pragma once
#include "../Baselib_CountdownTimer.h"
#include "../Baselib_Atomic_TypeSafe.h"
#include "../Baselib_SystemFutex.h"
#include "../Baselib_Thread.h"
#if !PLATFORM_FUTEX_NATIVE_SUPPORT
#error "Only use this implementation on top of a proper futex, in all other situations us Baselib_CappedSemaphore_SemaphoreBased.inl.h"
#endif
// Space out to different cache lines.
// the idea here is that threads waking up from sleep should not have to
// access the cache line where count is stored, and only touch wakeups.
// the only exception to that rule is if we hit a timeout.
typedef struct Baselib_CappedSemaphore
{
int32_t wakeups;
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
int32_t count;
const int32_t cap;
char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t) * 2]; // Having cap on the same cacheline is fine since it is a constant.
} Baselib_CappedSemaphore;
BASELIB_STATIC_ASSERT(sizeof(Baselib_CappedSemaphore) == PLATFORM_CACHE_LINE_SIZE * 2, "Baselib_CappedSemaphore (Futex) size should match 2*cacheline size (128bytes)");
BASELIB_STATIC_ASSERT(offsetof(Baselib_CappedSemaphore, wakeups) ==
(offsetof(Baselib_CappedSemaphore, count) - PLATFORM_CACHE_LINE_SIZE), "Baselib_CappedSemaphore (futex) wakeups and count shouldnt share cacheline");
BASELIB_INLINE_API Baselib_CappedSemaphore Baselib_CappedSemaphore_Create(const uint16_t cap)
{
Baselib_CappedSemaphore semaphore = { 0, {0}, 0, cap, {0} };
return semaphore;
}
BASELIB_INLINE_API bool Detail_Baselib_CappedSemaphore_ConsumeWakeup(Baselib_CappedSemaphore* semaphore)
{
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->wakeups);
while (previousCount > 0)
{
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->wakeups, &previousCount, previousCount - 1))
return true;
}
return false;
}
BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryAcquire(Baselib_CappedSemaphore* semaphore)
{
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
while (previousCount > 0)
{
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
return true;
}
return false;
}
BASELIB_INLINE_API void Baselib_CappedSemaphore_Acquire(Baselib_CappedSemaphore* semaphore)
{
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return;
while (!Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore))
{
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, UINT32_MAX);
}
}
BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryTimedAcquire(Baselib_CappedSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
{
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return true;
if (Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore))
return true;
uint32_t timeLeft = timeoutInMilliseconds;
const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
do
{
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, timeLeft);
if (Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore))
return true;
timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
}
while (timeLeft);
// When timeout occurs we need to make sure we do one of the following:
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
//
// If count is not negative it's likely we are racing with a release operation in which case we
// may end up having a successful acquire operation.
do
{
int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
while (count < 0)
{
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
return false;
}
// Likely a race, yield to give the release operation room to complete.
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
Baselib_Thread_YieldExecution();
}
while (!Detail_Baselib_CappedSemaphore_ConsumeWakeup(semaphore));
return true;
}
BASELIB_INLINE_API uint16_t Baselib_CappedSemaphore_Release(Baselib_CappedSemaphore* semaphore, const uint16_t _count)
{
int32_t count = _count;
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
do
{
if (previousCount == semaphore->cap)
return 0;
if (previousCount + count > semaphore->cap)
count = semaphore->cap - previousCount;
}
while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->count, &previousCount, previousCount + count));
if (OPTIMIZER_UNLIKELY(previousCount < 0))
{
const int32_t waitingThreads = -previousCount;
const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
}
return count;
}
BASELIB_INLINE_API uint32_t Baselib_CappedSemaphore_ResetAndReleaseWaitingThreads(Baselib_CappedSemaphore* semaphore)
{
const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
if (OPTIMIZER_LIKELY(count >= 0))
return 0;
const int32_t threadsToWakeup = -count;
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_All);
return threadsToWakeup;
}
BASELIB_INLINE_API void Baselib_CappedSemaphore_Free(Baselib_CappedSemaphore* semaphore)
{
if (!semaphore)
return;
const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
}

View File

@@ -0,0 +1,122 @@
#pragma once
#include "../Baselib_Atomic_TypeSafe.h"
#include "../Baselib_SystemSemaphore.h"
#include "../Baselib_Thread.h"
#if PLATFORM_FUTEX_NATIVE_SUPPORT
#error "It's highly recommended to use Baselib_CappedSemaphore_FutexBased.inl.h on platforms which has native semaphore support"
#endif
typedef struct Baselib_CappedSemaphore
{
Baselib_SystemSemaphore_Handle handle;
int32_t count;
const int32_t cap;
// Make the capped semaphore take a full cache line so that if the user cacheline aligned semaphore,
// llsc operations on count will not spuriously fail.
char _cachelineSpacer[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t) * 2 - sizeof(Baselib_SystemSemaphore_Handle)];
char _systemSemaphoreData[Baselib_SystemSemaphore_PlatformSize];
} Baselib_CappedSemaphore;
BASELIB_STATIC_ASSERT((offsetof(Baselib_CappedSemaphore, count) + PLATFORM_CACHE_LINE_SIZE - sizeof(Baselib_SystemSemaphore_Handle)) ==
offsetof(Baselib_CappedSemaphore, _systemSemaphoreData), "count and internalData must not share cacheline");
BASELIB_INLINE_API Baselib_CappedSemaphore Baselib_CappedSemaphore_Create(uint16_t cap)
{
Baselib_CappedSemaphore semaphore = {{0}, 0, cap, {0}, {0}};
semaphore.handle = Baselib_SystemSemaphore_CreateInplace(&semaphore._systemSemaphoreData);
return semaphore;
}
BASELIB_INLINE_API void Baselib_CappedSemaphore_Acquire(Baselib_CappedSemaphore* semaphore)
{
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return;
Baselib_SystemSemaphore_Acquire(semaphore->handle);
}
BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryAcquire(Baselib_CappedSemaphore* semaphore)
{
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
while (previousCount > 0)
{
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
return true;
}
return false;
}
BASELIB_INLINE_API bool Baselib_CappedSemaphore_TryTimedAcquire(Baselib_CappedSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
{
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return true;
if (OPTIMIZER_LIKELY(Baselib_SystemSemaphore_TryTimedAcquire(semaphore->handle, timeoutInMilliseconds)))
return true;
// When timeout occurs we need to make sure we do one of the following:
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
//
// If count is not negative it's likely we are racing with a release operation in which case we
// may end up having a successful acquire operation.
do
{
int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
while (count < 0)
{
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
return false;
}
// Likely a race, yield to give the release operation room to complete.
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
Baselib_Thread_YieldExecution();
}
while (!Baselib_SystemSemaphore_TryAcquire(semaphore->handle));
return true;
}
BASELIB_INLINE_API uint16_t Baselib_CappedSemaphore_Release(Baselib_CappedSemaphore* semaphore, const uint16_t _count)
{
int32_t count = _count;
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
do
{
if (previousCount == semaphore->cap)
return 0;
if (previousCount + count > semaphore->cap)
count = semaphore->cap - previousCount;
}
while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->count, &previousCount, previousCount + count));
if (OPTIMIZER_UNLIKELY(previousCount < 0))
{
const int32_t waitingThreads = -previousCount;
const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
}
return count;
}
BASELIB_INLINE_API uint32_t Baselib_CappedSemaphore_ResetAndReleaseWaitingThreads(Baselib_CappedSemaphore* semaphore)
{
const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
if (OPTIMIZER_LIKELY(count >= 0))
return 0;
const int32_t threadsToWakeup = -count;
Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
return threadsToWakeup;
}
BASELIB_INLINE_API void Baselib_CappedSemaphore_Free(Baselib_CappedSemaphore* semaphore)
{
if (!semaphore)
return;
const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
Baselib_SystemSemaphore_FreeInplace(semaphore->handle);
}

View File

@@ -0,0 +1,7 @@
#pragma once
#include "../Baselib_StaticAssert.h"
#define BASELIB_ENUM_ENSURE_ABI_COMPATIBILITY(_enumType) \
BASELIB_STATIC_ASSERT(sizeof(_enumType) == 4, \
"Baselib assumes that sizeof any enum type is exactly 4 bytes, there might be ABI compatibility problems if violated");

View File

@@ -0,0 +1,198 @@
#pragma once
#include "../Baselib_CountdownTimer.h"
#include "../Baselib_Atomic_TypeSafe.h"
#include "../Baselib_SystemFutex.h"
#if !PLATFORM_FUTEX_NATIVE_SUPPORT
#error "Only use this implementation on top of a proper futex, in all other situations us Baselib_EventSemaphore_SemaphoreBased.inl.h"
#endif
typedef struct Baselib_EventSemaphore
{
int32_t state;
char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
} Baselib_EventSemaphore;
BASELIB_STATIC_ASSERT(sizeof(Baselib_EventSemaphore) == PLATFORM_CACHE_LINE_SIZE, "Baselib_EventSemaphore size should match cacheline size (64bytes)");
// The futex based event semaphore is in one of *three* states:
// * ResetNoWaitingThreads: EventSemaphore blocks threads, but there aren't any blocked yet
// * Reset: EventSemaphore blocks threads and there are some already
// * Set: EventSemaphore is not blocking any acquiring threads
//
// The ResetNoWaitingThreads state is an optimization that allows us to avoid the (comparatively) costly futex notification syscalls.
//
// In addition, there is a generation counter baked into the state variable in order to prevent lock stealing.
// -> Any change in the state during acquire (other than going from ResetNoWaitingThreads to Reset) means that the thread can continue
// (since in this case either it was set on the current generation or the generation was changed which implies an earlier release operation)
//
// Allowed state transitions:
// ResetNoWaitingThreads-Gen(X) -> Reset-Gen(X) == Acquire/TryTimedAcquire if no thread was waiting already
// ResetNoWaitingThreads-Gen(X) -> Set-Gen(X) == Set but no thread was waiting
// Reset-Gen(X) -> Set-Get(X+1) == Set if threads were waiting
// Set-Get(X) -> ResetNoWaitingThreads-Gen(X) == Reset/ResetAndReleaseWaitingThreads
// Reset-Gen(X) -> ResetNoWaitingThreads-Gen(X+1) == ResetAndReleaseWaitingThreads if threads were waiting
//
// Note how any state transition from Reset requires increasing the generation counter.
enum
{
//Detail_Baselib_EventSemaphore_ResetNoWaitingThreads = 0,
Detail_Baselib_EventSemaphore_Set = (uint32_t)1 << 30,
Detail_Baselib_EventSemaphore_Reset = (uint32_t)2 << 30,
Detail_Baselib_EventSemaphore_GenMask = ~((uint32_t)(1 | 2) << 30)
};
static FORCE_INLINE uint32_t Detail_Baselib_EventSemaphore_Generation(int32_t state)
{
return state & Detail_Baselib_EventSemaphore_GenMask;
}
// If Detail_Baselib_EventSemaphore_ResetNoWaitingThreads is set, sets Detail_Baselib_EventSemaphore_Reset flag.
// Returns last known state of the semaphore.
// Does nothing if state changed while this function runs (that includes generation changes while attempting to set the ResetState!)
static FORCE_INLINE uint32_t Detail_Baselib_EventSemaphore_TransitionFrom_ResetNoWaitingThreadsState_To_ResetState(Baselib_EventSemaphore* semaphore)
{
int32_t state = Baselib_atomic_load_32_acquire(&semaphore->state);
const int32_t resetState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Reset;
const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
while (state == resetNoWaitingThreadsState)
{
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->state, &state, resetState))
return resetState;
}
return state;
}
BASELIB_INLINE_API Baselib_EventSemaphore Baselib_EventSemaphore_Create(void)
{
const Baselib_EventSemaphore semaphore = { 0, {0} };
return semaphore;
}
COMPILER_WARN_UNUSED_RESULT
BASELIB_INLINE_API bool Baselib_EventSemaphore_TryAcquire(Baselib_EventSemaphore* semaphore)
{
const int32_t state = Baselib_atomic_load_32_acquire(&semaphore->state);
return state & Detail_Baselib_EventSemaphore_Set ? true : false;
}
BASELIB_INLINE_API void Baselib_EventSemaphore_Acquire(Baselib_EventSemaphore* semaphore)
{
const int32_t state = Detail_Baselib_EventSemaphore_TransitionFrom_ResetNoWaitingThreadsState_To_ResetState(semaphore);
if (state & Detail_Baselib_EventSemaphore_Set)
return;
do
{
// State is now in Detail_Baselib_EventSemaphore_Reset-Gen(X).
Baselib_SystemFutex_Wait(&semaphore->state, state, UINT32_MAX);
// If the state has changed in any way, it is now in either of
// Set-Gen(X), Set-Gen(X+n), ResetNoWaitingThreads-Gen(X+n) or Reset(X+n). (with n>0)
if (state != Baselib_atomic_load_32_relaxed(&semaphore->state))
return;
}
while (true);
}
COMPILER_WARN_UNUSED_RESULT
BASELIB_INLINE_API bool Baselib_EventSemaphore_TryTimedAcquire(Baselib_EventSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
{
const int32_t state = Detail_Baselib_EventSemaphore_TransitionFrom_ResetNoWaitingThreadsState_To_ResetState(semaphore);
if (state & Detail_Baselib_EventSemaphore_Set)
return true;
uint32_t timeLeft = timeoutInMilliseconds;
const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
do
{
// State is now in Detail_Baselib_EventSemaphore_Reset-Gen(X).
Baselib_SystemFutex_Wait(&semaphore->state, state, timeLeft);
// If the state has changed in any way, it is now in either of
// Set-Gen(X), Set-Gen(X+n), ResetNoWaitingThreads-Gen(X+n) or Reset(X+n). (with n>0)
if (state != Baselib_atomic_load_32_relaxed(&semaphore->state))
return true;
timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
}
while (timeLeft);
// The EventSemaphore looks now like there are still threads waiting even if there *might* be none!
// This is not an issue however, since it merely means that Set/ResetAndReleaseWaitingThreads will do a potentially redundant futex notification.
return false;
}
BASELIB_INLINE_API void Baselib_EventSemaphore_Reset(Baselib_EventSemaphore* semaphore)
{
int32_t state = Baselib_atomic_load_32_relaxed(&semaphore->state);
const int32_t setState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Set;
while (state == setState)
{
const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, resetNoWaitingThreadsState))
return;
}
Baselib_atomic_thread_fence_release();
}
BASELIB_INLINE_API void Baselib_EventSemaphore_Set(Baselib_EventSemaphore* semaphore)
{
int32_t state = Baselib_atomic_load_32_relaxed(&semaphore->state);
const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
const int32_t resetState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Reset;
// If there is no thread waiting on the semaphore, there is no need to wake & increase the generation count.
// Just set it to Set if it isn't already.
while (state == resetNoWaitingThreadsState)
{
const int32_t setState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Set;
if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, setState))
return;
}
// If this is not the case however, we do exactly that, increase the generation & wake all threads.
while (state == resetState)
{
const int32_t nextGenSetState = Detail_Baselib_EventSemaphore_Generation(state + 1) | Detail_Baselib_EventSemaphore_Set;
if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, nextGenSetState))
{
Baselib_SystemFutex_Notify(&semaphore->state, UINT32_MAX, Baselib_WakeupFallbackStrategy_All);
return;
}
}
// EventSemaphore was already in set state.
Baselib_atomic_thread_fence_release();
}
BASELIB_INLINE_API void Baselib_EventSemaphore_ResetAndReleaseWaitingThreads(Baselib_EventSemaphore* semaphore)
{
// Note that doing a Baselib_EventSemaphore_Set & Baselib_EventSemaphore_Reset has the same observable effects, just slightly slower.
int32_t state = Baselib_atomic_load_32_relaxed(&semaphore->state);
const int32_t setState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Set;
const int32_t resetState = Detail_Baselib_EventSemaphore_Generation(state) | Detail_Baselib_EventSemaphore_Reset;
// If there is no thread waiting on the semaphore, there is no need to wake & increase the generation count.
// Just set it to ResetNoWaitingThreads if it isn't already.
while (state == setState)
{
const int32_t resetNoWaitingThreadsState = Detail_Baselib_EventSemaphore_Generation(state);
if (Baselib_atomic_compare_exchange_weak_32_release_relaxed(&semaphore->state, &state, resetNoWaitingThreadsState))
return;
}
// If this is not the case however, we do exactly that, increase the generation & wake all threads.
while (state == resetState)
{
const int32_t nextGenPendingResetState = Detail_Baselib_EventSemaphore_Generation(state + 1);
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->state, &state, nextGenPendingResetState))
{
Baselib_SystemFutex_Notify(&semaphore->state, UINT32_MAX, Baselib_WakeupFallbackStrategy_All);
return;
}
}
// EventSemaphore was already in ResetNoWaiting threads state.
Baselib_atomic_thread_fence_release();
}
BASELIB_INLINE_API void Baselib_EventSemaphore_Free(Baselib_EventSemaphore* semaphore)
{
}

View File

@@ -0,0 +1,211 @@
#pragma once
#include "../Baselib_CountdownTimer.h"
#include "../Baselib_Atomic_TypeSafe.h"
#include "../Baselib_SystemSemaphore.h"
#include "../Baselib_StaticAssert.h"
#if PLATFORM_FUTEX_NATIVE_SUPPORT
#error "It's highly recommended to use Baselib_EventSemaphore_FutexBased.inl.h on platforms which has native semaphore support"
#endif
typedef union BASELIB_ALIGN_AS (8) Detail_Baselib_EventSemaphore_State
{
struct
{
// Can be changed without checking for changes in numWaitingForSetInProgress (use 32bit cmpex)
int32_t numWaitingForSetAndStateFlags;
// Typically not changed without checking numWaitingForSetAndStateFlags (use 64bit cmpex)
int32_t numWaitingForSetInProgress;
} parts;
int64_t stateInt64;
} Detail_Baselib_EventSemaphore_State;
enum
{
// If this flag is set, threads are still waking up from a previous Set or ResetAndReleaseWaitingThreads call.
// While this is set, any thread entering an Acquire method (that doesn't see Detail_Baselib_EventSemaphore_SetFlag),
// will wait until it is cleared before proceeding with normal operations.
Detail_Baselib_EventSemaphore_SetInProgressFlag = (uint32_t)1 << 30,
// If this flag is set, threads acquiring the semaphore succeed immediately.
Detail_Baselib_EventSemaphore_SetFlag = (uint32_t)2 << 30,
Detail_Baselib_EventSemaphore_NumWaitingForSetMask = ~((uint32_t)(1 | 2) << 30)
};
typedef struct Baselib_EventSemaphore
{
Detail_Baselib_EventSemaphore_State state;
Baselib_SystemSemaphore_Handle setSemaphore;
Baselib_SystemSemaphore_Handle setInProgressSemaphore;
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - 2 * sizeof(Baselib_SystemSemaphore_Handle) - sizeof(Detail_Baselib_EventSemaphore_State)];
char _systemSemaphoreDataSemaphore[Baselib_SystemSemaphore_PlatformSize];
char _cachelineSpacer1[PLATFORM_CACHE_LINE_SIZE - Baselib_SystemSemaphore_PlatformSize];
char _systemSemaphoreDataInProgressSemaphore[Baselib_SystemSemaphore_PlatformSize];
} Baselib_EventSemaphore;
BASELIB_STATIC_ASSERT((offsetof(Baselib_EventSemaphore, state) + PLATFORM_CACHE_LINE_SIZE) ==
offsetof(Baselib_EventSemaphore, _systemSemaphoreDataSemaphore), "state and _systemSemaphoreDataSemaphore must not share cacheline");
BASELIB_STATIC_ASSERT((offsetof(Baselib_EventSemaphore, _systemSemaphoreDataSemaphore) + PLATFORM_CACHE_LINE_SIZE) ==
offsetof(Baselib_EventSemaphore, _systemSemaphoreDataInProgressSemaphore), "_systemSemaphoreDataSemaphore and _systemSemaphoreDataInProgressSemaphore must not share cacheline");
// How (Timed)Acquire works for the SemaphoreBased EventSemaphore:
//
// If there is a set pending (Detail_Baselib_EventSemaphore_SetInProgressFlag is set),
// it means that not all threads from the previous wakeup call (either via Set or ResetAndReleaseWaitingThreads) have been woken up.
// If we would just continue, we might steal the wakeup tokens of those threads! So instead we wait until they are done.
//
// This is different from the FutexBased version, however there is no way for a user to distinguish that from
// a "regular (but lengthy)" preemption at the start of the function.
// Meaning that we don't care how often the semaphore got set and reset in the meantime!
//
//
// Invariants:
//
// Allowed flag state transitions:
// 0 -> Set | SetInProgress
// Set | SetInProgress <-> Set
// Set | SetInProgress <-> SetInProgress
// Set -> 0
// SetInProgress -> 0
//
// Additionally:
// * numWaitingForSetInProgress can only grow if SetInProgress is set.
// * numWaitingForSet can only grow if Set is set
#ifdef __cplusplus
BASELIB_C_INTERFACE
{
#endif
BASELIB_API void Detail_Baselib_EventSemaphore_SemaphoreBased_AcquireNonSet(int32_t initialNumWaitingForSetAndStateFlags, Baselib_EventSemaphore* semaphore);
COMPILER_WARN_UNUSED_RESULT
BASELIB_API bool Detail_Baselib_EventSemaphore_SemaphoreBased_TryTimedAcquireNonSet(int32_t initialNumWaitingForSetAndStateFlags, Baselib_EventSemaphore* semaphore, uint32_t timeoutInMilliseconds);
#ifdef __cplusplus
} // BASELIB_C_INTERFACE
#endif
static FORCE_INLINE bool Detail_Baselib_EventSemaphore_IsSet(int32_t numWaitingForSetAndStateFlags)
{
return (numWaitingForSetAndStateFlags & Detail_Baselib_EventSemaphore_SetFlag) ? true : false;
}
static FORCE_INLINE bool Detail_Baselib_EventSemaphore_IsSetInProgress(int32_t numWaitingForSetAndStateFlags)
{
return (numWaitingForSetAndStateFlags & Detail_Baselib_EventSemaphore_SetInProgressFlag) ? true : false;
}
static FORCE_INLINE int32_t Detail_Baselib_EventSemaphore_GetWaitingForSetCount(int32_t numWaitingForSetAndStateFlags)
{
return numWaitingForSetAndStateFlags & Detail_Baselib_EventSemaphore_NumWaitingForSetMask;
}
// Changes WaitingForSet count without affecting state flags
static FORCE_INLINE int32_t Detail_Baselib_EventSemaphore_SetWaitingForSetCount(int32_t currentNumWaitingForSetAndStateFlags, int32_t newNumWaitingForSet)
{
return newNumWaitingForSet | (currentNumWaitingForSetAndStateFlags & (~Detail_Baselib_EventSemaphore_NumWaitingForSetMask));
}
BASELIB_INLINE_API Baselib_EventSemaphore Baselib_EventSemaphore_Create(void)
{
Baselib_EventSemaphore semaphore = {{{0, 0}}, {0}, {0}, {0}, {0}, {0}, {0}};
semaphore.setSemaphore = Baselib_SystemSemaphore_CreateInplace(semaphore._systemSemaphoreDataSemaphore);
semaphore.setInProgressSemaphore = Baselib_SystemSemaphore_CreateInplace(semaphore._systemSemaphoreDataInProgressSemaphore);
return semaphore;
}
COMPILER_WARN_UNUSED_RESULT
BASELIB_INLINE_API bool Baselib_EventSemaphore_TryAcquire(Baselib_EventSemaphore* semaphore)
{
const int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_acquire(&semaphore->state.parts.numWaitingForSetAndStateFlags);
return Detail_Baselib_EventSemaphore_IsSet(numWaitingForSetAndStateFlags);
}
BASELIB_INLINE_API void Baselib_EventSemaphore_Acquire(Baselib_EventSemaphore* semaphore)
{
const int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_acquire(&semaphore->state.parts.numWaitingForSetAndStateFlags);
if (!Detail_Baselib_EventSemaphore_IsSet(numWaitingForSetAndStateFlags))
Detail_Baselib_EventSemaphore_SemaphoreBased_AcquireNonSet(numWaitingForSetAndStateFlags, semaphore);
}
COMPILER_WARN_UNUSED_RESULT
BASELIB_INLINE_API bool Baselib_EventSemaphore_TryTimedAcquire(Baselib_EventSemaphore* semaphore, const uint32_t timeoutInMilliseconds)
{
const int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_acquire(&semaphore->state.parts.numWaitingForSetAndStateFlags);
if (!Detail_Baselib_EventSemaphore_IsSet(numWaitingForSetAndStateFlags))
return Detail_Baselib_EventSemaphore_SemaphoreBased_TryTimedAcquireNonSet(numWaitingForSetAndStateFlags, semaphore, timeoutInMilliseconds);
return true;
}
BASELIB_INLINE_API void Baselib_EventSemaphore_Reset(Baselib_EventSemaphore* semaphore)
{
int32_t resetNumWaitingForSetAndStateFlags;
int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_relaxed(&semaphore->state.parts.numWaitingForSetAndStateFlags);
do
{
resetNumWaitingForSetAndStateFlags = numWaitingForSetAndStateFlags & (~Detail_Baselib_EventSemaphore_SetFlag);
}
while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(
&semaphore->state.parts.numWaitingForSetAndStateFlags,
&numWaitingForSetAndStateFlags,
resetNumWaitingForSetAndStateFlags));
}
BASELIB_INLINE_API void Baselib_EventSemaphore_Set(Baselib_EventSemaphore* semaphore)
{
int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_relaxed(&semaphore->state.parts.numWaitingForSetAndStateFlags);
int32_t numWaitingForSetAndStateFlagsSet, numWaitingForSet;
do
{
numWaitingForSetAndStateFlagsSet = numWaitingForSetAndStateFlags | Detail_Baselib_EventSemaphore_SetFlag;
numWaitingForSet = Detail_Baselib_EventSemaphore_GetWaitingForSetCount(numWaitingForSetAndStateFlags);
BaselibAssert(numWaitingForSet >= 0, "There needs to be always a non-negative amount of threads waiting for Set");
if (numWaitingForSet)
numWaitingForSetAndStateFlagsSet |= Detail_Baselib_EventSemaphore_SetInProgressFlag;
}
while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(
&semaphore->state.parts.numWaitingForSetAndStateFlags,
&numWaitingForSetAndStateFlags,
numWaitingForSetAndStateFlagsSet));
if (!Detail_Baselib_EventSemaphore_IsSetInProgress(numWaitingForSetAndStateFlags) && numWaitingForSet)
Baselib_SystemSemaphore_Release(semaphore->setSemaphore, numWaitingForSet);
}
BASELIB_INLINE_API void Baselib_EventSemaphore_ResetAndReleaseWaitingThreads(Baselib_EventSemaphore* semaphore)
{
// Note that doing a Baselib_EventSemaphore_Set & Baselib_EventSemaphore_Reset has the same observable effects, just slightly slower.
int32_t numWaitingForSetAndStateFlags = Baselib_atomic_load_32_relaxed(&semaphore->state.parts.numWaitingForSetAndStateFlags);
int32_t resetNumWaitingForSetAndStateFlags, numWaitingForSet;
do
{
resetNumWaitingForSetAndStateFlags = numWaitingForSetAndStateFlags & (~Detail_Baselib_EventSemaphore_SetFlag);
numWaitingForSet = Detail_Baselib_EventSemaphore_GetWaitingForSetCount(numWaitingForSetAndStateFlags);
BaselibAssert(numWaitingForSet >= 0, "There needs to be always a non-negative amount of threads waiting for Set");
if (numWaitingForSet)
resetNumWaitingForSetAndStateFlags |= Detail_Baselib_EventSemaphore_SetInProgressFlag;
}
while (!Baselib_atomic_compare_exchange_weak_32_release_relaxed(
&semaphore->state.parts.numWaitingForSetAndStateFlags,
&numWaitingForSetAndStateFlags,
resetNumWaitingForSetAndStateFlags));
if (!Detail_Baselib_EventSemaphore_IsSetInProgress(numWaitingForSetAndStateFlags) && numWaitingForSet)
Baselib_SystemSemaphore_Release(semaphore->setSemaphore, numWaitingForSet);
}
BASELIB_INLINE_API void Baselib_EventSemaphore_Free(Baselib_EventSemaphore* semaphore)
{
if (!semaphore)
return;
Baselib_SystemSemaphore_FreeInplace(semaphore->setSemaphore);
Baselib_SystemSemaphore_FreeInplace(semaphore->setInProgressSemaphore);
}

View File

@@ -0,0 +1,150 @@
#pragma once
#include "../Baselib_CountdownTimer.h"
#include "../Baselib_Atomic_TypeSafe.h"
#include "../Baselib_SystemFutex.h"
#include "../Baselib_Thread.h"
#if !PLATFORM_FUTEX_NATIVE_SUPPORT
#error "Only use this implementation on top of a proper futex, in all other situations us Baselib_HighCapacitySemaphore_SemaphoreBased.inl.h"
#endif
// Space out to different cache lines.
// the idea here is that threads waking up from sleep should not have to
// access the cache line where count is stored, and only touch wakeups.
// the only exception to that rule is if we hit a timeout.
typedef struct Baselib_HighCapacitySemaphore
{
int32_t wakeups;
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int64_t)];
int64_t count;
char _cachelineSpacer2[PLATFORM_CACHE_LINE_SIZE - sizeof(int64_t)];
} Baselib_HighCapacitySemaphore;
BASELIB_INLINE_API Baselib_HighCapacitySemaphore Baselib_HighCapacitySemaphore_Create(void)
{
Baselib_HighCapacitySemaphore semaphore = {0, {0}, 0, {0}};
return semaphore;
}
BASELIB_INLINE_API bool Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(Baselib_HighCapacitySemaphore* semaphore)
{
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->wakeups);
while (previousCount > 0)
{
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->wakeups, &previousCount, previousCount - 1))
return true;
}
return false;
}
BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryAcquire(Baselib_HighCapacitySemaphore* semaphore)
{
int64_t previousCount = Baselib_atomic_load_64_relaxed(&semaphore->count);
while (previousCount > 0)
{
if (Baselib_atomic_compare_exchange_weak_64_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
return true;
}
return false;
}
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Acquire(Baselib_HighCapacitySemaphore* semaphore)
{
const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return;
while (!Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(semaphore))
{
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, UINT32_MAX);
}
}
BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryTimedAcquire(Baselib_HighCapacitySemaphore* semaphore, const uint32_t timeoutInMilliseconds)
{
const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return true;
uint32_t timeLeft = timeoutInMilliseconds;
const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
do
{
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, timeLeft);
if (Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(semaphore))
return true;
timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
}
while (timeLeft);
// When timeout occurs we need to make sure we do one of the following:
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
//
// If count is not negative it's likely we are racing with a release operation in which case we
// may end up having a successful acquire operation.
do
{
int64_t count = Baselib_atomic_load_64_relaxed(&semaphore->count);
while (count < 0)
{
if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &count, count + 1))
return false;
}
// Likely a race, yield to give the release operation room to complete.
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
Baselib_Thread_YieldExecution();
}
while (!Detail_Baselib_HighCapacitySemaphore_ConsumeWakeup(semaphore));
return true;
}
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Release(Baselib_HighCapacitySemaphore* semaphore, const uint32_t _count)
{
const int64_t count = _count;
int64_t previousCount = Baselib_atomic_fetch_add_64_release(&semaphore->count, count);
// This should only be possible if millions of threads enter this function simultaneously posting with a high count.
// See overflow protection below.
BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", (int32_t)previousCount, (int32_t)count);
if (OPTIMIZER_UNLIKELY(previousCount < 0))
{
const int64_t waitingThreads = -previousCount;
const int64_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
BaselibAssert(threadsToWakeup <= INT32_MAX);
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, (int32_t)threadsToWakeup);
Baselib_SystemFutex_Notify(&semaphore->wakeups, (int32_t)threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
return;
}
// overflow protection
// we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
// this way we won't have to do clamping on every iteration
while (OPTIMIZER_UNLIKELY(previousCount > Baselib_HighCapacitySemaphore_MaxGuaranteedCount * 2))
{
const int64_t maxCount = Baselib_HighCapacitySemaphore_MaxGuaranteedCount;
if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
return;
}
}
BASELIB_INLINE_API uint64_t Baselib_HighCapacitySemaphore_ResetAndReleaseWaitingThreads(Baselib_HighCapacitySemaphore* semaphore)
{
const int64_t count = Baselib_atomic_exchange_64_release(&semaphore->count, 0);
if (OPTIMIZER_LIKELY(count >= 0))
return 0;
const int64_t threadsToWakeup = -count;
BaselibAssert(threadsToWakeup <= INT32_MAX);
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, (int32_t)threadsToWakeup);
Baselib_SystemFutex_Notify(&semaphore->wakeups, (int32_t)threadsToWakeup, Baselib_WakeupFallbackStrategy_All);
return threadsToWakeup;
}
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Free(Baselib_HighCapacitySemaphore* semaphore)
{
if (!semaphore)
return;
const int64_t count = Baselib_atomic_load_64_seq_cst(&semaphore->count);
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
}

View File

@@ -0,0 +1,126 @@
#pragma once
#include "../Baselib_Atomic_TypeSafe.h"
#include "../Baselib_SystemSemaphore.h"
#include "../Baselib_Thread.h"
#if PLATFORM_FUTEX_NATIVE_SUPPORT
#error "It's highly recommended to use Baselib_HighCapacitySemaphore_FutexBased.inl.h on platforms which has native semaphore support"
#endif
typedef struct Baselib_HighCapacitySemaphore
{
int64_t count;
Baselib_SystemSemaphore_Handle handle;
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int64_t) - sizeof(Baselib_SystemSemaphore_Handle)];
char _systemSemaphoreData[Baselib_SystemSemaphore_PlatformSize];
} Baselib_HighCapacitySemaphore;
BASELIB_STATIC_ASSERT((offsetof(Baselib_HighCapacitySemaphore, count) + PLATFORM_CACHE_LINE_SIZE) ==
offsetof(Baselib_HighCapacitySemaphore, _systemSemaphoreData), "count and internalData must not share cacheline");
BASELIB_INLINE_API Baselib_HighCapacitySemaphore Baselib_HighCapacitySemaphore_Create(void)
{
Baselib_HighCapacitySemaphore semaphore = {0, {0}, {0}, {0}};
semaphore.handle = Baselib_SystemSemaphore_CreateInplace(&semaphore._systemSemaphoreData);
return semaphore;
}
BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryAcquire(Baselib_HighCapacitySemaphore* semaphore)
{
int64_t previousCount = Baselib_atomic_load_64_relaxed(&semaphore->count);
while (previousCount > 0)
{
if (Baselib_atomic_compare_exchange_weak_64_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
return true;
}
return false;
}
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Acquire(Baselib_HighCapacitySemaphore* semaphore)
{
const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return;
Baselib_SystemSemaphore_Acquire(semaphore->handle);
}
BASELIB_INLINE_API bool Baselib_HighCapacitySemaphore_TryTimedAcquire(Baselib_HighCapacitySemaphore* semaphore, const uint32_t timeoutInMilliseconds)
{
const int64_t previousCount = Baselib_atomic_fetch_add_64_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return true;
if (OPTIMIZER_LIKELY(Baselib_SystemSemaphore_TryTimedAcquire(semaphore->handle, timeoutInMilliseconds)))
return true;
// When timeout occurs we need to make sure we do one of the following:
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
//
// If count is not negative it's likely we are racing with a release operation in which case we
// may end up having a successful acquire operation.
do
{
int64_t count = Baselib_atomic_load_64_relaxed(&semaphore->count);
while (count < 0)
{
if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &count, count + 1))
return false;
}
// Likely a race, yield to give the release operation room to complete.
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
Baselib_Thread_YieldExecution();
}
while (!Baselib_SystemSemaphore_TryAcquire(semaphore->handle));
return true;
}
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Release(Baselib_HighCapacitySemaphore* semaphore, const uint32_t _count)
{
const int64_t count = _count;
int64_t previousCount = Baselib_atomic_fetch_add_64_release(&semaphore->count, count);
// This should only be possible if millions of threads enter this function simultaneously posting with a high count.
// See overflow protection below.
BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", (int32_t)previousCount, (int32_t)count);
if (OPTIMIZER_UNLIKELY(previousCount < 0))
{
const int64_t waitingThreads = -previousCount;
const int64_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
BaselibAssert(threadsToWakeup <= (int64_t)UINT32_MAX);
Baselib_SystemSemaphore_Release(semaphore->handle, (uint32_t)threadsToWakeup);
return;
}
// overflow protection
// we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
// this way we won't have to do clamping on every iteration
while (OPTIMIZER_UNLIKELY(previousCount > Baselib_HighCapacitySemaphore_MaxGuaranteedCount * 2))
{
const int64_t maxCount = Baselib_HighCapacitySemaphore_MaxGuaranteedCount;
if (Baselib_atomic_compare_exchange_weak_64_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
return;
}
}
BASELIB_INLINE_API uint64_t Baselib_HighCapacitySemaphore_ResetAndReleaseWaitingThreads(Baselib_HighCapacitySemaphore* semaphore)
{
const int64_t count = Baselib_atomic_exchange_64_release(&semaphore->count, 0);
if (OPTIMIZER_LIKELY(count >= 0))
return 0;
const int64_t threadsToWakeup = -count;
BaselibAssert(threadsToWakeup <= (int64_t)UINT32_MAX);
Baselib_SystemSemaphore_Release(semaphore->handle, (uint32_t)threadsToWakeup);
return threadsToWakeup;
}
BASELIB_INLINE_API void Baselib_HighCapacitySemaphore_Free(Baselib_HighCapacitySemaphore* semaphore)
{
if (!semaphore)
return;
const int64_t count = Baselib_atomic_load_64_seq_cst(&semaphore->count);
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
Baselib_SystemSemaphore_FreeInplace(semaphore->handle);
}

View File

@@ -0,0 +1,92 @@
#pragma once
#include "../Baselib_CountdownTimer.h"
#include "../Baselib_Atomic_TypeSafe.h"
#include "../Baselib_SystemFutex.h"
enum Detail_Baselib_Lock_State
{
Detail_Baselib_Lock_UNLOCKED = 0,
Detail_Baselib_Lock_LOCKED = 1,
Detail_Baselib_Lock_CONTENDED = 2,
};
typedef struct Baselib_Lock
{
int32_t state;
char _cachelineSpacer[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
} Baselib_Lock;
BASELIB_INLINE_API Baselib_Lock Baselib_Lock_Create(void)
{
Baselib_Lock lock = {Detail_Baselib_Lock_UNLOCKED, {0}};
return lock;
}
COMPILER_WARN_UNUSED_RESULT
BASELIB_INLINE_API bool Baselib_Lock_TryAcquire(Baselib_Lock* lock)
{
int32_t previousState = Detail_Baselib_Lock_UNLOCKED;
do
{
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&lock->state, &previousState, Detail_Baselib_Lock_LOCKED))
return true;
}
while (previousState == Detail_Baselib_Lock_UNLOCKED);
return false;
}
BASELIB_INLINE_API void Baselib_Lock_Acquire(Baselib_Lock* lock)
{
int32_t previousState = Detail_Baselib_Lock_UNLOCKED;
do
{
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&lock->state, &previousState, previousState + 1))
break;
}
while (previousState != Detail_Baselib_Lock_CONTENDED);
while (OPTIMIZER_LIKELY(previousState != Detail_Baselib_Lock_UNLOCKED))
{
Baselib_SystemFutex_Wait(&lock->state, Detail_Baselib_Lock_CONTENDED, UINT32_MAX);
previousState = Baselib_atomic_exchange_32_relaxed(&lock->state, Detail_Baselib_Lock_CONTENDED);
}
}
COMPILER_WARN_UNUSED_RESULT
BASELIB_INLINE_API bool Baselib_Lock_TryTimedAcquire(Baselib_Lock* lock, const uint32_t timeoutInMilliseconds)
{
int32_t previousState = Detail_Baselib_Lock_UNLOCKED;
do
{
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&lock->state, &previousState, previousState + 1))
break;
}
while (previousState != Detail_Baselib_Lock_CONTENDED);
if (OPTIMIZER_LIKELY(previousState == Detail_Baselib_Lock_UNLOCKED))
return true;
uint32_t timeLeft = timeoutInMilliseconds;
const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
do
{
Baselib_SystemFutex_Wait(&lock->state, Detail_Baselib_Lock_CONTENDED, timeoutInMilliseconds);
const int32_t previousState = Baselib_atomic_exchange_32_relaxed(&lock->state, Detail_Baselib_Lock_CONTENDED);
if (previousState == Detail_Baselib_Lock_UNLOCKED)
return true;
timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
}
while (timeLeft);
return false;
}
BASELIB_INLINE_API void Baselib_Lock_Release(Baselib_Lock* lock)
{
const int32_t previousState = Baselib_atomic_exchange_32_release(&lock->state, Detail_Baselib_Lock_UNLOCKED);
if (previousState == Detail_Baselib_Lock_CONTENDED)
Baselib_SystemFutex_Notify(&lock->state, 1, Baselib_WakeupFallbackStrategy_OneByOne);
}
BASELIB_INLINE_API void Baselib_Lock_Free(Baselib_Lock* lock)
{
}

View File

@@ -0,0 +1,46 @@
#pragma once
#include "../Baselib_CountdownTimer.h"
#include "../Baselib_CappedSemaphore.h"
typedef struct Baselib_Lock
{
Baselib_CappedSemaphore semaphore;
} Baselib_Lock;
BASELIB_INLINE_API Baselib_Lock Baselib_Lock_Create(void)
{
Baselib_Lock lock = { Baselib_CappedSemaphore_Create(1) };
uint16_t submittedTokens = Baselib_CappedSemaphore_Release(&lock.semaphore, 1);
BaselibAssert(submittedTokens == 1, "CappedSemaphore was unable to accept our token");
return lock;
}
BASELIB_INLINE_API void Baselib_Lock_Acquire(Baselib_Lock* lock)
{
Baselib_CappedSemaphore_Acquire(&lock->semaphore);
}
COMPILER_WARN_UNUSED_RESULT
BASELIB_INLINE_API bool Baselib_Lock_TryAcquire(Baselib_Lock* lock)
{
return Baselib_CappedSemaphore_TryAcquire(&lock->semaphore);
}
COMPILER_WARN_UNUSED_RESULT
BASELIB_INLINE_API bool Baselib_Lock_TryTimedAcquire(Baselib_Lock* lock, const uint32_t timeoutInMilliseconds)
{
return Baselib_CappedSemaphore_TryTimedAcquire(&lock->semaphore, timeoutInMilliseconds);
}
BASELIB_INLINE_API void Baselib_Lock_Release(Baselib_Lock* lock)
{
Baselib_CappedSemaphore_Release(&lock->semaphore, 1);
}
BASELIB_INLINE_API void Baselib_Lock_Free(Baselib_Lock* lock)
{
if (!lock)
return;
Baselib_CappedSemaphore_Free(&lock->semaphore);
}

View File

@@ -0,0 +1,93 @@
#pragma once
#include "../Baselib_Lock.h"
#include "../Baselib_StaticAssert.h"
#include "../Baselib_Alignment.h"
#include "../Baselib_Thread.h"
typedef struct Baselib_ReentrantLock
{
Baselib_Lock lock;
Baselib_Thread_Id owner;
int32_t count;
} Baselib_ReentrantLock;
BASELIB_STATIC_ASSERT((BASELIB_ALIGN_OF(Baselib_ReentrantLock) + offsetof(Baselib_ReentrantLock, owner)) % sizeof(Baselib_Thread_Id) == 0, "Baselib_ReentrantLock::owner is not aligned for atomic use");
BASELIB_STATIC_ASSERT((BASELIB_ALIGN_OF(Baselib_ReentrantLock) + offsetof(Baselib_ReentrantLock, count)) % sizeof(int32_t) == 0, "Baselib_ReentrantLock::count is not aligned for atomic use");
BASELIB_INLINE_API Baselib_ReentrantLock Baselib_ReentrantLock_Create(void)
{
Baselib_ReentrantLock lock = {Baselib_Lock_Create(), Baselib_Thread_InvalidId, 0};
return lock;
}
COMPILER_WARN_UNUSED_RESULT
BASELIB_INLINE_API bool Baselib_ReentrantLock_TryAcquire(Baselib_ReentrantLock* lock)
{
const Baselib_Thread_Id currentThreadId = Baselib_Thread_GetCurrentThreadId();
const Baselib_Thread_Id lockOwner = Baselib_atomic_load_ptr_relaxed(&lock->owner);
if (OPTIMIZER_LIKELY(currentThreadId != lockOwner))
{
if (!Baselib_Lock_TryAcquire(&lock->lock))
return false;
lock->owner = currentThreadId;
lock->count = 1;
return true;
}
lock->count++;
return true;
}
BASELIB_INLINE_API void Baselib_ReentrantLock_Acquire(Baselib_ReentrantLock* lock)
{
const Baselib_Thread_Id currentThreadId = Baselib_Thread_GetCurrentThreadId();
const Baselib_Thread_Id lockOwner = Baselib_atomic_load_ptr_relaxed(&lock->owner);
if (OPTIMIZER_LIKELY(currentThreadId != lockOwner))
{
Baselib_Lock_Acquire(&lock->lock);
lock->owner = currentThreadId;
lock->count = 1;
return;
}
lock->count++;
}
COMPILER_WARN_UNUSED_RESULT
BASELIB_INLINE_API bool Baselib_ReentrantLock_TryTimedAcquire(Baselib_ReentrantLock* lock, const uint32_t timeoutInMilliseconds)
{
const Baselib_Thread_Id currentThreadId = Baselib_Thread_GetCurrentThreadId();
const Baselib_Thread_Id lockOwner = Baselib_atomic_load_ptr_relaxed(&lock->owner);
if (OPTIMIZER_LIKELY(currentThreadId != lockOwner))
{
if (!Baselib_Lock_TryTimedAcquire(&lock->lock, timeoutInMilliseconds))
return false;
lock->owner = currentThreadId;
lock->count = 1;
return true;
}
lock->count++;
return true;
}
BASELIB_INLINE_API void Baselib_ReentrantLock_Release(Baselib_ReentrantLock* lock)
{
if (lock->count > 0)
{
BaselibAssert(Baselib_atomic_load_ptr_relaxed(&lock->owner) == Baselib_Thread_GetCurrentThreadId(), "A recursive lock can only be unlocked by the locking thread");
if (OPTIMIZER_LIKELY(lock->count == 1))
{
lock->owner = Baselib_Thread_InvalidId;
lock->count = 0;
Baselib_Lock_Release(&lock->lock);
return;
}
lock->count--;
}
}
BASELIB_INLINE_API void Baselib_ReentrantLock_Free(Baselib_ReentrantLock* lock)
{
if (!lock)
return;
Baselib_Lock_Free(&lock->lock);
}

View File

@@ -0,0 +1,152 @@
#pragma once
#include "../Baselib_CountdownTimer.h"
#include "../Baselib_Atomic_TypeSafe.h"
#include "../Baselib_SystemFutex.h"
#include "../Baselib_Thread.h"
#if !PLATFORM_FUTEX_NATIVE_SUPPORT
#error "Only use this implementation on top of a proper futex, in all other situations us Baselib_Semaphore_SemaphoreBased.inl.h"
#endif
// Space out to different cache lines.
// the idea here is that threads waking up from sleep should not have to
// access the cache line where count is stored, and only touch wakeups.
// the only exception to that rule is if we hit a timeout.
typedef struct Baselib_Semaphore
{
int32_t wakeups;
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
int32_t count;
char _cachelineSpacer2[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t)];
} Baselib_Semaphore;
BASELIB_STATIC_ASSERT(sizeof(Baselib_Semaphore) == PLATFORM_CACHE_LINE_SIZE * 2, "Baselib_Semaphore (Futex) size should match 2*cacheline size (128bytes)");
BASELIB_STATIC_ASSERT(offsetof(Baselib_Semaphore, wakeups) ==
(offsetof(Baselib_Semaphore, count) - PLATFORM_CACHE_LINE_SIZE), "Baselib_Semaphore (Futex) wakeups and count shouldnt share cacheline");
BASELIB_INLINE_API Baselib_Semaphore Baselib_Semaphore_Create(void)
{
Baselib_Semaphore semaphore = {0, {0}, 0, {0}};
return semaphore;
}
BASELIB_INLINE_API bool Detail_Baselib_Semaphore_ConsumeWakeup(Baselib_Semaphore* semaphore)
{
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->wakeups);
while (previousCount > 0)
{
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->wakeups, &previousCount, previousCount - 1))
return true;
}
return false;
}
BASELIB_INLINE_API bool Baselib_Semaphore_TryAcquire(Baselib_Semaphore* semaphore)
{
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
while (previousCount > 0)
{
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
return true;
}
return false;
}
BASELIB_INLINE_API void Baselib_Semaphore_Acquire(Baselib_Semaphore* semaphore)
{
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return;
while (!Detail_Baselib_Semaphore_ConsumeWakeup(semaphore))
{
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, UINT32_MAX);
}
}
BASELIB_INLINE_API bool Baselib_Semaphore_TryTimedAcquire(Baselib_Semaphore* semaphore, const uint32_t timeoutInMilliseconds)
{
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return true;
uint32_t timeLeft = timeoutInMilliseconds;
const Baselib_CountdownTimer timer = Baselib_CountdownTimer_StartMs(timeoutInMilliseconds);
do
{
Baselib_SystemFutex_Wait(&semaphore->wakeups, 0, timeLeft);
if (Detail_Baselib_Semaphore_ConsumeWakeup(semaphore))
return true;
timeLeft = Baselib_CountdownTimer_GetTimeLeftInMilliseconds(timer);
}
while (timeLeft);
// When timeout occurs we need to make sure we do one of the following:
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
//
// If count is not negative it's likely we are racing with a release operation in which case we
// may end up having a successful acquire operation.
do
{
int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
while (count < 0)
{
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
return false;
}
// Likely a race, yield to give the release operation room to complete.
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
Baselib_Thread_YieldExecution();
}
while (!Detail_Baselib_Semaphore_ConsumeWakeup(semaphore));
return true;
}
BASELIB_INLINE_API void Baselib_Semaphore_Release(Baselib_Semaphore* semaphore, const uint16_t _count)
{
const int32_t count = _count;
int32_t previousCount = Baselib_atomic_fetch_add_32_release(&semaphore->count, count);
// This should only be possible if thousands of threads enter this function simultaneously posting with a high count.
// See overflow protection below.
BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", previousCount, count);
if (OPTIMIZER_UNLIKELY(previousCount < 0))
{
const int32_t waitingThreads = -previousCount;
const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
return;
}
// overflow protection
// we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
// this way we won't have to do clamping on every iteration
while (OPTIMIZER_UNLIKELY(previousCount > Baselib_Semaphore_MaxGuaranteedCount * 2))
{
const int32_t maxCount = Baselib_Semaphore_MaxGuaranteedCount;
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
return;
}
}
BASELIB_INLINE_API uint32_t Baselib_Semaphore_ResetAndReleaseWaitingThreads(Baselib_Semaphore* semaphore)
{
const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
if (OPTIMIZER_LIKELY(count >= 0))
return 0;
const int32_t threadsToWakeup = -count;
Baselib_atomic_fetch_add_32_relaxed(&semaphore->wakeups, threadsToWakeup);
Baselib_SystemFutex_Notify(&semaphore->wakeups, threadsToWakeup, Baselib_WakeupFallbackStrategy_All);
return threadsToWakeup;
}
BASELIB_INLINE_API void Baselib_Semaphore_Free(Baselib_Semaphore* semaphore)
{
if (!semaphore)
return;
const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
}

View File

@@ -0,0 +1,126 @@
#pragma once
#include "../Baselib_Atomic_TypeSafe.h"
#include "../Baselib_SystemSemaphore.h"
#include "../Baselib_Thread.h"
#if PLATFORM_FUTEX_NATIVE_SUPPORT
#error "It's highly recommended to use Baselib_Semaphore_FutexBased.inl.h on platforms which has native semaphore support"
#endif
typedef struct Baselib_Semaphore
{
Baselib_SystemSemaphore_Handle handle;
int32_t count;
char _cachelineSpacer0[PLATFORM_CACHE_LINE_SIZE - sizeof(int32_t) - sizeof(Baselib_SystemSemaphore_Handle)];
char _systemSemaphoreData[Baselib_SystemSemaphore_PlatformSize];
} Baselib_Semaphore;
BASELIB_STATIC_ASSERT((offsetof(Baselib_Semaphore, count) + PLATFORM_CACHE_LINE_SIZE - sizeof(Baselib_SystemSemaphore_Handle)) ==
offsetof(Baselib_Semaphore, _systemSemaphoreData), "count and internalData must not share cacheline");
BASELIB_INLINE_API Baselib_Semaphore Baselib_Semaphore_Create(void)
{
Baselib_Semaphore semaphore = {{0}, 0, {0}, {0}};
semaphore.handle = Baselib_SystemSemaphore_CreateInplace(&semaphore._systemSemaphoreData);
return semaphore;
}
BASELIB_INLINE_API bool Baselib_Semaphore_TryAcquire(Baselib_Semaphore* semaphore)
{
int32_t previousCount = Baselib_atomic_load_32_relaxed(&semaphore->count);
while (previousCount > 0)
{
if (Baselib_atomic_compare_exchange_weak_32_acquire_relaxed(&semaphore->count, &previousCount, previousCount - 1))
return true;
}
return false;
}
BASELIB_INLINE_API void Baselib_Semaphore_Acquire(Baselib_Semaphore* semaphore)
{
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return;
Baselib_SystemSemaphore_Acquire(semaphore->handle);
}
BASELIB_INLINE_API bool Baselib_Semaphore_TryTimedAcquire(Baselib_Semaphore* semaphore, const uint32_t timeoutInMilliseconds)
{
const int32_t previousCount = Baselib_atomic_fetch_add_32_acquire(&semaphore->count, -1);
if (OPTIMIZER_LIKELY(previousCount > 0))
return true;
if (OPTIMIZER_LIKELY(Baselib_SystemSemaphore_TryTimedAcquire(semaphore->handle, timeoutInMilliseconds)))
return true;
// When timeout occurs we need to make sure we do one of the following:
// Increase count by one from a negative value (give our acquired token back) or consume a wakeup.
//
// If count is not negative it's likely we are racing with a release operation in which case we
// may end up having a successful acquire operation.
do
{
int32_t count = Baselib_atomic_load_32_relaxed(&semaphore->count);
while (count < 0)
{
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &count, count + 1))
return false;
}
// Likely a race, yield to give the release operation room to complete.
// This includes a fully memory barrier which ensures that there is no reordering between changing/reading count and wakeup consumption.
Baselib_Thread_YieldExecution();
}
while (!Baselib_SystemSemaphore_TryAcquire(semaphore->handle));
return true;
}
BASELIB_INLINE_API void Baselib_Semaphore_Release(Baselib_Semaphore* semaphore, const uint16_t _count)
{
const int32_t count = _count;
int32_t previousCount = Baselib_atomic_fetch_add_32_release(&semaphore->count, count);
// This should only be possible if thousands of threads enter this function simultaneously posting with a high count.
// See overflow protection below.
BaselibAssert(previousCount <= (previousCount + count), "Semaphore count overflow (current: %d, added: %d).", previousCount, count);
if (OPTIMIZER_UNLIKELY(previousCount < 0))
{
const int32_t waitingThreads = -previousCount;
const int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
return;
}
// overflow protection
// we clamp count to MaxGuaranteedCount when count exceed MaxGuaranteedCount * 2
// this way we won't have to do clamping on every iteration
while (OPTIMIZER_UNLIKELY(previousCount > Baselib_Semaphore_MaxGuaranteedCount * 2))
{
const int32_t maxCount = Baselib_Semaphore_MaxGuaranteedCount;
if (Baselib_atomic_compare_exchange_weak_32_relaxed_relaxed(&semaphore->count, &previousCount, maxCount))
return;
}
}
BASELIB_INLINE_API uint32_t Baselib_Semaphore_ResetAndReleaseWaitingThreads(Baselib_Semaphore* semaphore)
{
const int32_t count = Baselib_atomic_exchange_32_release(&semaphore->count, 0);
if (OPTIMIZER_LIKELY(count >= 0))
return 0;
const int32_t threadsToWakeup = -count;
Baselib_SystemSemaphore_Release(semaphore->handle, threadsToWakeup);
return threadsToWakeup;
}
BASELIB_INLINE_API void Baselib_Semaphore_Free(Baselib_Semaphore* semaphore)
{
if (!semaphore)
return;
const int32_t count = Baselib_atomic_load_32_seq_cst(&semaphore->count);
BaselibAssert(count >= 0, "Destruction is not allowed when there are still threads waiting on the semaphore.");
Baselib_SystemSemaphore_FreeInplace(semaphore->handle);
}

View File

@@ -0,0 +1,194 @@
#pragma once
#include "../../../C/Baselib_Atomic.h"
#include "../../../C/Baselib_Atomic_Macros.h"
#include "Baselib_Atomic_Gcc_Apple_LLVM_Patch.h"
#if COMPILER_GCC && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 7))
#pragma message "GNUC: " PP_STRINGIZE(__GNUC__) " GNUC_MINOR: " PP_STRINGIZE(__GNUC_MINOR__)
#error "GCC is too old and/or missing compatible atomic built-in functions" PP_STRINGIZE(__GNUC__)
#endif
#define detail_intrinsic_relaxed __ATOMIC_RELAXED
#define detail_intrinsic_acquire __ATOMIC_ACQUIRE
#define detail_intrinsic_release __ATOMIC_RELEASE
#define detail_intrinsic_acq_rel __ATOMIC_ACQ_REL
#define detail_intrinsic_seq_cst __ATOMIC_SEQ_CST
// Patch gcc and clang intrinsics to achieve a sequentially consistent barrier.
// As of writing Clang 9, GCC 9 none of them produce a seq cst barrier for load-store operations.
// To fix this we switch load store to be acquire release with a full final barrier.
#define detail_ldst_intrinsic_relaxed detail_intrinsic_relaxed
#define detail_ldst_intrinsic_acquire detail_intrinsic_acquire
#define detail_ldst_intrinsic_release detail_intrinsic_release
#define detail_ldst_intrinsic_acq_rel detail_intrinsic_acq_rel
#define detail_ldst_intrinsic_seq_cst detail_intrinsic_seq_cst
#if defined(__aarch64__)
#undef detail_ldst_intrinsic_seq_cst
#define detail_ldst_intrinsic_seq_cst __ATOMIC_ACQ_REL
#define detail_AARCH64_SEQCST_PATCH_BARRIER_relaxed
#define detail_AARCH64_SEQCST_PATCH_BARRIER_acquire
#define detail_AARCH64_SEQCST_PATCH_BARRIER_release
#define detail_AARCH64_SEQCST_PATCH_BARRIER_acq_rel
#define detail_AARCH64_SEQCST_PATCH_BARRIER_seq_cst __extension__({__atomic_thread_fence (__ATOMIC_SEQ_CST); });
#else
#define detail_AARCH64_SEQCST_PATCH_BARRIER_relaxed
#define detail_AARCH64_SEQCST_PATCH_BARRIER_acquire
#define detail_AARCH64_SEQCST_PATCH_BARRIER_release
#define detail_AARCH64_SEQCST_PATCH_BARRIER_acq_rel
#define detail_AARCH64_SEQCST_PATCH_BARRIER_seq_cst
#endif
#define detail_THREAD_FENCE(order, ...) \
static FORCE_INLINE void Baselib_atomic_thread_fence_##order(void) \
{ \
__extension__({__atomic_thread_fence (detail_intrinsic_##order); }); \
} \
#define detail_LOAD(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(const void* obj, void* result) \
{ \
__extension__({ __atomic_load((int_type*)obj, (int_type*)result, detail_intrinsic_##order); }); \
}
#define detail_LOAD_NOT_CONST(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result) \
{ \
__extension__({ __atomic_load((int_type*)obj, (int_type*)result, detail_intrinsic_##order); }); \
}
#define detail_STORE(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value) \
{ \
__extension__({ __atomic_store((int_type*)obj, (int_type*)value, detail_intrinsic_##order); }); \
}
#define detail_ALU(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result) \
{ \
*(int_type*)result = __extension__({ __atomic_##op((int_type*)obj, *(int_type*)value, detail_ldst_intrinsic_##order); });\
detail_AARCH64_SEQCST_PATCH_BARRIER_##order; \
}
#define detail_XCHG(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result) \
{ \
__extension__({ __atomic_exchange((int_type*)obj, (int_type*)value, (int_type*)result, detail_ldst_intrinsic_##order); });\
detail_AARCH64_SEQCST_PATCH_BARRIER_##order; \
}
#define detail_CMP_XCHG_WEAK(op, order1, order2, id , bits, int_type, ...) \
static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value) \
{ \
detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value); \
bool result = __extension__({ __atomic_compare_exchange( \
(int_type*)obj, \
(int_type*)expected, \
(int_type*)value, \
1, \
detail_ldst_intrinsic_##order1, \
detail_ldst_intrinsic_##order2); \
}); \
if (result) { detail_AARCH64_SEQCST_PATCH_BARRIER_##order1; } \
else { detail_AARCH64_SEQCST_PATCH_BARRIER_##order2;} \
return result; \
}
#define detail_CMP_XCHG_STRONG(op, order1, order2, id , bits, int_type, ...) \
static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value) \
{ \
detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value); \
bool result = __extension__ ({ __atomic_compare_exchange( \
(int_type*)obj, \
(int_type*)expected, \
(int_type*)value, \
0, \
detail_ldst_intrinsic_##order1, \
detail_ldst_intrinsic_##order2); \
}); \
if (result) { detail_AARCH64_SEQCST_PATCH_BARRIER_##order1; } \
else { detail_AARCH64_SEQCST_PATCH_BARRIER_##order2;} \
return result; \
}
#define detail_NOT_SUPPORTED(...)
Baselib_Atomic_FOR_EACH_MEMORY_ORDER(
detail_THREAD_FENCE
)
Baselib_Atomic_FOR_EACH_ATOMIC_OP_MEMORY_ORDER_AND_TYPE(
detail_LOAD, // load
detail_STORE, // store
detail_ALU, // add
detail_ALU, // and
detail_ALU, // or
detail_ALU, // xor
detail_XCHG, // exchange
detail_CMP_XCHG_WEAK, // compare_exchange_weak
detail_CMP_XCHG_STRONG, // compare_exchange_strong
)
#if PLATFORM_ARCH_64
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
detail_LOAD_NOT_CONST, // load
detail_STORE, // store
detail_NOT_SUPPORTED, // add
detail_NOT_SUPPORTED, // and
detail_NOT_SUPPORTED, // or
detail_NOT_SUPPORTED, // xor
detail_XCHG, // exchange
detail_CMP_XCHG_WEAK, // compare_exchange_weak
detail_CMP_XCHG_STRONG, // compare_exchange_strong
128, 128, __int128 // type information
)
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
detail_LOAD_NOT_CONST, // load
detail_STORE, // store
detail_NOT_SUPPORTED, // add
detail_NOT_SUPPORTED, // and
detail_NOT_SUPPORTED, // or
detail_NOT_SUPPORTED, // xor
detail_XCHG, // exchange
detail_CMP_XCHG_WEAK, // compare_exchange_weak
detail_CMP_XCHG_STRONG, // compare_exchange_strong
ptr2x, 128, __int128 // type information
)
#else
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
detail_LOAD_NOT_CONST, // load
detail_STORE, // store
detail_NOT_SUPPORTED, // add
detail_NOT_SUPPORTED, // and
detail_NOT_SUPPORTED, // or
detail_NOT_SUPPORTED, // xor
detail_XCHG, // exchange
detail_CMP_XCHG_WEAK, // compare_exchange_weak
detail_CMP_XCHG_STRONG, // compare_exchange_strong
ptr2x, 64, int64_t // type information
)
#endif
#undef detail_intrinsic_relaxed
#undef detail_intrinsic_acquire
#undef detail_intrinsic_release
#undef detail_intrinsic_acq_rel
#undef detail_intrinsic_seq_cst
#undef detail_THREAD_FENCE
#undef detail_LOAD
#undef detail_LOAD_NOT_CONST
#undef detail_STORE
#undef detail_ALU
#undef detail_XCHG
#undef detail_CMP_XCHG_WEAK
#undef detail_CMP_XCHG_STRONG
#undef detail_NOT_SUPPORTED
#include "Baselib_Atomic_Gcc_Apple_LLVM_Patch_PostInclude.h"

View File

@@ -0,0 +1,142 @@
#pragma once
#if PLATFORM_USE_APPLE_LLVM_ATOMIC_CMPXCHG_128_PATCH
//
// Patch for Apple LLVM version 8.x.x (clang-800.0.38 - clang-900.0.37) intrinsic 128-bit __atomic_compare_exchange implementation (debug, using opt level -O0).
// Note that this patch is only in effect on tvOS/iOS AArch64 debug builds for Apple LLVM version 8.x.x. Arm32 verified working without patch.
//
// Problem:
// For the above builds, the __atomic_compare_exchange asm expasion used SUBS/SBCS to compare the pair of "obj" and "expected" values.
// SUBS/SBCS does not provide sufficient NZCV flags for comparing two 64-bit values.
// The result is erraneous comparison of "obj" and "expected". Some examples:
//
// -- fails (lo != lo && hi == hi)
// obj.lo = 5;
// obj.hi = 10;
// expected.lo = 3;
// expected.hi = 10;
//
// -- works (expected.lo < 0)
// obj.lo = 5;
// obj.hi = 20;
// expected.lo = -3;
// expected.hi = 20;
//
// -- fails (obj.lo < 0 && hi == hi)
// obj.lo = -5;
// obj.hi = 30;
// expected.lo = 3;
// expected.hi = 30;
//
// -- fails (expected.lo < 0 && obj.hi+1 == expected.hi)
// obj.lo = 5;
// obj.hi = 3;
// expected.lo = -3;
// expected.hi = 2;
//
// Solution: Inline assembly replacement of __atomic_compare_exchange using the same approach as in release mode
//
// Note: This patch should be removed in it's entirety once we require Apple LLVM version 9 (clang-900.0.37) or higher for building.
//
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ld_instr, st_instr, barrier_instr) \
{ \
register bool result asm ("w0"); \
asm volatile \
( \
" ldp x12, x13, [%x4] ; load expected \n" \
" ldp x10, x11, [%x5] ; load value \n" \
" " #ld_instr " x9, x8, [%x3] ; load obj \n" \
" eor x13, x8, x13 ; compare to expected \n" \
" eor x12, x9, x12 \n" \
" orr x12, x12, x13 \n" \
" cbnz x12, 0f ; not equal = no store \n" \
" " #st_instr " w12, x10, x11, [%x0] ; try store \n" \
" cbnz w12, 1f \n" \
" orr w0, wzr, #0x1 ; success, result in w0 \n" \
" b 2f \n" \
"0: ; no store \n" \
" clrex \n" \
"1: ; failed store \n" \
" movz w0, #0 \n" \
"2: ; store expected, fail \n" \
" tbnz w0, #0, 3f \n" \
" stp x9, x8, [%x1] \n" \
"3: \n" \
" " #barrier_instr " \n" \
\
: "+r" (obj), "+r" (expected), "=r" (result) \
: "r" (obj), "r" (expected), "r" (value) \
: "x8", "x9", "x10", "x11", "x12", "x13", "cc", "memory"); \
\
return result != 0; \
}
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_relaxed_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldxp, stxp, )
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stxp, )
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stxp, )
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_release_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldxp, stlxp, )
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, )
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, )
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, dmb ish)
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, dmb ish)
#define detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_seq_cst(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_WEAK_128(obj, expected, value, ldaxp, stlxp, dmb ish)
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ld_instr, st_instr, barrier_instr) \
{ \
register bool result asm ("w0"); \
asm volatile \
( \
" ldp x10, x11, [%x4] ; load expected \n" \
" ldp x12, x13, [%x5] ; load value \n" \
"0: \n" \
" " #ld_instr " x9, x8, [%x3] ; load obj (ldxp/ldaxp) \n" \
" eor x14, x8, x11 ; compare to expected \n" \
" eor x15, x9, x10 \n" \
" orr x14, x15, x14 \n" \
" cbnz x14, 1f ; not equal = no store \n" \
" " #st_instr " w14, x12, x13, [%x0] ; try store (stxp/stlxp) \n" \
" cbnz w14, 0b ; retry or store result in w0 \n" \
" orr w0, wzr, #0x1 \n" \
" b 2f \n" \
"1: ; no store \n" \
" movz w0, #0 \n" \
" clrex \n" \
"2: ; store expected on fail \n" \
" tbnz w0, #0, 3f \n" \
" stp x9, x8, [%x1] \n" \
"3: \n" \
" " #barrier_instr " \n" \
\
: "+r" (obj), "+r" (expected), "=r" (result) \
: "r" (obj), "r" (expected), "r" (value) \
: "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "cc", "memory"); \
\
return result != 0; \
}
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_relaxed_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldxp, stxp, )
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stxp, )
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stxp, )
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_release_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldxp, stlxp, )
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, )
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, )
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_relaxed(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, dmb ish)
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_acquire(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, dmb ish)
#define detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_seq_cst(obj, expected, value) detail_APPLE_LLVM_CMP_XCHG_STRONG_128(obj, expected, value, ldaxp, stlxp, dmb ish)
#define detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value) \
if(sizeof(int_type) == 16) \
detail_APPLE_LLVM_CMP_XCHG_WEAK_128_##order1##_##order2(obj, expected, value);
#define detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH(order1, order2, int_type, obj, expected, value) \
if(sizeof(int_type) == 16) \
detail_APPLE_LLVM_CMP_XCHG_STRONG_128_##order1##_##order2(obj, expected, value);
#else // PLATFORM_USE_APPLE_LLVM_ATOMIC_CMPXCHG_128_PATCH
#define detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH(...)
#define detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH(...)
#endif

View File

@@ -0,0 +1,30 @@
#pragma once
#if PLATFORM_USE_APPLE_LLVM_ATOMIC_CMPXCHG_128_PATCH
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_relaxed_relaxed
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_relaxed
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acquire_acquire
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_release_relaxed
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_relaxed
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_acq_rel_acquire
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_relaxed
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_acquire
#undef detail_APPLE_LLVM_CMP_XCHG_WEAK_128_seq_cst_seq_cst
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_relaxed_relaxed
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_relaxed
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acquire_acquire
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_release_relaxed
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_relaxed
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_acq_rel_acquire
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_relaxed
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_acquire
#undef detail_APPLE_LLVM_CMP_XCHG_STRONG_128_seq_cst_seq_cst
#undef detail_APPLE_LLVM_CMP_XCHG_128_WEAK_APPLE_LLVM_PATCH
#undef detail_APPLE_LLVM_CMP_XCHG_128_STRONG_APPLE_LLVM_PATCH
#endif

View File

@@ -0,0 +1,40 @@
#pragma once
// Arm exlusive state access break implementation
#define detail_Baselib_atomic_llsc_break() __builtin_arm_clrex()
// Arm exlusive LLSC implementation using intrinsics.
#define detail_Baselib_atomic_llsc_arm_ts(obj, expected, value, code, ll_instr, sc_instr, load_barrier, store_barrier) \
do { \
do { \
*expected = __builtin_arm_##ll_instr(obj); \
load_barrier; \
code; \
} while (__builtin_arm_##sc_instr(*value, obj)); \
store_barrier; \
} while (false)
#define detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ll_instr, sc_instr, loadbarrier, storebarrier) \
detail_Baselib_atomic_llsc_arm_ts((int_type*)((void*)obj), \
(int_type*)((void*)expected), \
(int_type*)((void*)value), \
code, ll_instr, sc_instr, loadbarrier, storebarrier)
#define detail_Baselib_atomic_llsc_relaxed_relaxed_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, ,)
#if PLATFORM_ARCH_64
#define detail_Baselib_atomic_llsc_acquire_relaxed_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldaex, strex, ,)
#define detail_Baselib_atomic_llsc_relaxed_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, stlex, ,)
#define detail_Baselib_atomic_llsc_acquire_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldaex, stlex, ,)
#define detail_Baselib_atomic_llsc_seq_cst_seq_cst_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldaex, stlex, , __builtin_arm_dmb(11) )
#else
#define detail_Baselib_atomic_llsc_acquire_relaxed_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, __builtin_arm_dmb(11), )
#define detail_Baselib_atomic_llsc_relaxed_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, ,__builtin_arm_dmb(11) )
#define detail_Baselib_atomic_llsc_acquire_release_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, __builtin_arm_dmb(11) , __builtin_arm_dmb(11) )
#define detail_Baselib_atomic_llsc_seq_cst_seq_cst_v(obj, expected, value, code, int_type) detail_Baselib_atomic_llsc_arm_v(obj, expected, value, code, int_type, ldrex, strex, __builtin_arm_dmb(11) , __builtin_arm_dmb(11) )
#endif
#define detail_Baselib_atomic_llsc_v(obj, expected, value, code, size, loadbarrier, storebarrier) \
detail_Baselib_atomic_llsc_##loadbarrier##_##storebarrier##_v(obj, expected, value, code, int##size##_t)
#define detail_Baselib_atomic_llsc_128_v(obj, expected, value, code, loadbarrier, storebarrier) \
detail_Baselib_atomic_llsc_##loadbarrier##_##storebarrier##_v(obj, expected, value, code, __int128)

View File

@@ -0,0 +1,358 @@
#pragma once
#include "../../../C/Baselib_Atomic.h"
#include "../../../C/Baselib_Atomic_Macros.h"
#include "Baselib_Atomic_MsvcIntrinsics.h"
#define detail_relaxed_relaxed(...) __VA_ARGS__
#define detail_relaxed_acquire(...)
#define detail_relaxed_release(...)
#define detail_relaxed_acq_rel(...)
#define detail_relaxed_seq_cst(...)
#define detail_acquire_relaxed(...)
#define detail_acquire_acquire(...) __VA_ARGS__
#define detail_acquire_release(...)
#define detail_acquire_acq_rel(...)
#define detail_acquire_seq_cst(...)
#define detail_release_relaxed(...)
#define detail_release_acquire(...)
#define detail_release_release(...) __VA_ARGS__
#define detail_release_acq_rel(...)
#define detail_release_seq_cst(...)
#define detail_acq_rel_relaxed(...)
#define detail_acq_rel_acquire(...)
#define detail_acq_rel_release(...)
#define detail_acq_rel_acq_rel(...) __VA_ARGS__
#define detail_acq_rel_seq_cst(...)
#define detail_seq_cst_relaxed(...)
#define detail_seq_cst_acquire(...)
#define detail_seq_cst_release(...)
#define detail_seq_cst_acq_rel(...)
#define detail_seq_cst_seq_cst(...) __VA_ARGS__
#define detail_relaxed(memory_order, ...) detail_relaxed_##memory_order(__VA_ARGS__)
#define detail_acquire(memory_order, ...) detail_acquire_##memory_order(__VA_ARGS__)
#define detail_release(memory_order, ...) detail_release_##memory_order(__VA_ARGS__)
#define detail_acq_rel(memory_order, ...) detail_acq_rel_##memory_order(__VA_ARGS__)
#define detail_seq_cst(memory_order, ...) detail_seq_cst_##memory_order(__VA_ARGS__)
// Intel
// ------------------------------------------------------------------------------------------------------------------------------------------------------
#if defined(_M_IX86) || defined(_M_X64)
#define detail_intrinsic_relaxed
#define detail_intrinsic_acquire
#define detail_intrinsic_release
#define detail_intrinsic_acq_rel
#define detail_intrinsic_seq_cst
#if defined(_M_X64)
#define detail_THREAD_FENCE(order, ...) \
static COMPILER_FORCEINLINE void Baselib_atomic_thread_fence_##order() \
{ \
detail_acquire(order, _ReadWriteBarrier()); \
detail_release(order, _ReadWriteBarrier()); \
detail_acq_rel(order, _ReadWriteBarrier()); \
detail_seq_cst(order, __faststorefence()); \
}
#else // #defined(_M_IX86)
#define detail_THREAD_FENCE(order, ...) \
static COMPILER_FORCEINLINE void Baselib_atomic_thread_fence_##order() \
{ \
detail_acquire(order, _ReadWriteBarrier()); \
detail_release(order, _ReadWriteBarrier()); \
detail_acq_rel(order, _ReadWriteBarrier()); \
detail_seq_cst(order, _ReadWriteBarrier(); __int32 temp = 0; _InterlockedExchange32(&temp, 0); _ReadWriteBarrier()); \
}
#endif
#define detail_LOAD_BITS_8(obj, result) *(__int8*)result = *(const volatile __int8*)obj
#define detail_LOAD_BITS_16(obj, result) *(__int16*)result = *(const volatile __int16*)obj
#define detail_LOAD_BITS_32(obj, result) *(__int32*)result = *(const volatile __int32*)obj
#if PLATFORM_ARCH_64
#define detail_LOAD_BITS_64(obj, result) *(__int64*)result = *(const volatile __int64*)obj
#else
// x86 32-bit load/store 64-bit integer.
// - SSE2 enabled yields (identical to __mm_store/load):
// movsd xmm0, QWORD PTR unsigned __int64 obj
// movsd QWORD PTR unsigned __int64 result, xmm0
// - No SSE2 enabled yields:
// fld QWORD PTR unsigned __int64 obj
// fstp QWORD PTR unsigned __int64 result
// Link comparing various implementations: https://godbolt.org/z/T3zW5M
#define detail_LOAD_BITS_64(obj, result) *(double*)result = *(const volatile double*)obj
#endif
#define detail_LOAD(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(const void* obj, void* result) \
{ \
detail_LOAD_BITS_##bits(obj, result); \
detail_acquire(order, _ReadWriteBarrier()); \
detail_seq_cst(order, _ReadWriteBarrier()); \
}
#define detail_LOAD_NOT_CONST(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result) \
{ \
detail_LOAD_BITS_##bits(obj, result); \
detail_acquire(order, _ReadWriteBarrier()); \
detail_seq_cst(order, _ReadWriteBarrier()); \
}
#define detail_STORE_BITS_8(obj, value) *(volatile __int8*)obj = *(const __int8*)value
#define detail_STORE_BITS_16(obj, value) *(volatile __int16*)obj = *(const __int16*)value
#define detail_STORE_BITS_32(obj, value) *(volatile __int32*)obj = *(const __int32*)value
#if PLATFORM_ARCH_64
#define detail_STORE_BITS_64(obj, value) *(volatile __int64*)obj = *(const __int64*)value
#else
#define detail_STORE_BITS_64(obj, value) *(volatile double*)obj = *(double*)value
#endif
#define detail_STORE(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value) \
{ \
detail_relaxed(order, detail_STORE_BITS_##bits(obj, value)); \
detail_release(order, detail_STORE_BITS_##bits(obj, value); _ReadWriteBarrier()); \
detail_seq_cst(order, _InterlockedExchange##bits((__int##bits*)obj, *(const __int##bits*)value)); \
}
// ARM
// ------------------------------------------------------------------------------------------------------------------------------------------------------
#elif defined(_M_ARM) || defined(_M_ARM64)
#define detail_intrinsic_relaxed _nf
#define detail_intrinsic_acquire _acq
#define detail_intrinsic_release _rel
#define detail_intrinsic_acq_rel
#define detail_intrinsic_seq_cst
#define detail_THREAD_FENCE(order, ...) \
static COMPILER_FORCEINLINE void Baselib_atomic_thread_fence_##order() \
{ \
detail_acquire(order, __dmb(_ARM_BARRIER_ISH)); \
detail_release(order, __dmb(_ARM_BARRIER_ISH)); \
detail_acq_rel(order, __dmb(_ARM_BARRIER_ISH)); \
detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH)); \
}
#define detail_LOAD(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(const void* obj, void* result) \
{ \
*(__int##bits*)result = __iso_volatile_load##bits((const __int##bits*)obj); \
detail_acquire(order, __dmb(_ARM_BARRIER_ISH)); \
detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH)); \
}
#define detail_LOAD_NOT_CONST(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result) \
{ \
*(__int##bits*)result = __iso_volatile_load##bits((const __int##bits*)obj); \
detail_acquire(order, __dmb(_ARM_BARRIER_ISH)); \
detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH)); \
}
#define detail_STORE(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value) \
{ \
detail_release(order, __dmb(_ARM_BARRIER_ISH)); \
detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH)); \
__iso_volatile_store##bits((__int##bits*) obj, *(const __int##bits*)value); \
detail_seq_cst(order, __dmb(_ARM_BARRIER_ISH)); \
}
#endif
// Common
// ------------------------------------------------------------------------------------------------------------------------------------------------------
#define detail_intrinsic_exchange _InterlockedExchange
#define detail_intrinsic_fetch_add _InterlockedExchangeAdd
#define detail_intrinsic_fetch_and _InterlockedAnd
#define detail_intrinsic_fetch_or _InterlockedOr
#define detail_intrinsic_fetch_xor _InterlockedXor
#define detail_LOAD_STORE(op, order, id , bits, int_type, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result) \
{ \
*(__int##bits##*)result = PP_CONCAT(detail_intrinsic_##op, bits, detail_intrinsic_##order)((__int##bits##*)obj, *(const __int##bits##*)value); \
}
#define detail_CMP_XCHG(op, order1, order2, id , bits, int_type, ...) \
static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value) \
{ \
__int##bits cmp = *(__int##bits##*)expected; \
__int##bits result = PP_CONCAT(_InterlockedCompareExchange, bits, detail_intrinsic_##order1)((__int##bits##*)obj, *(__int##bits##*)value, cmp); \
return result == cmp ? true : (*(__int##bits##*)expected = result, false); \
}
#define detail_NOT_SUPPORTED(...)
// Setup implementation
// ------------------------------------------------------------------------------------------------------------------------------------------------------
Baselib_Atomic_FOR_EACH_MEMORY_ORDER(
detail_THREAD_FENCE
)
Baselib_Atomic_FOR_EACH_ATOMIC_OP_MEMORY_ORDER_AND_TYPE(
detail_LOAD, // load
detail_STORE, // store
detail_LOAD_STORE, // add
detail_LOAD_STORE, // and
detail_LOAD_STORE, // or
detail_LOAD_STORE, // xor
detail_LOAD_STORE, // exchange
detail_CMP_XCHG, // compare_exchange_weak
detail_CMP_XCHG // compare_exchange_strong
)
#if PLATFORM_ARCH_64
// 128-bit implementation
// There are more efficient ways of doing load, store and exchange on Arm64. Unfortunately MSVC doesn't provide intrinsics for those. The specific
// instructions needed to perform atomic load, store and exchange are also not available on MSVC.
// Hence we fallback to cmpxchg for all atomic ops.
// ------------------------------------------------------------------------------------------------------------------------------------------------------
#define detail_LOAD128(op, order, id, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, void* result) \
{ \
Baselib_atomic_compare_exchange_weak_128_##order##_##order##_v((void*)obj, result, result); \
}
#define detail_STORE128(op, order, id, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value) \
{ \
uint64_t comparand[2] = { ((volatile uint64_t*)obj)[0], ((volatile uint64_t*)obj)[1] }; \
while(!Baselib_atomic_compare_exchange_weak_128_##order##_relaxed_v(obj, comparand, value)) \
; \
}
#define detail_XCHG128(op, order, id, ...) \
static FORCE_INLINE void Baselib_atomic_##op##_##id##_##order##_v(void* obj, const void* value, void* result) \
{ \
((uint64_t*)result)[0] = ((volatile uint64_t*)obj)[0]; \
((uint64_t*)result)[1] = ((volatile uint64_t*)obj)[1]; \
while(!Baselib_atomic_compare_exchange_weak_128_##order##_relaxed_v(obj, result, value)) \
; \
}
#define detail_CMP_XCHG128(op, order1, order2, id, ...) \
static FORCE_INLINE bool Baselib_atomic_##op##_##id##_##order1##_##order2##_v(void* obj, void* expected, const void* value) \
{ \
return PP_CONCAT(_InterlockedCompareExchange128, detail_intrinsic_##order1)( \
(__int64*)obj, \
((const __int64*)value)[1], \
((const __int64*)value)[0], \
(__int64*)expected \
) == 1; \
}
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
detail_LOAD128, // load
detail_STORE128, // store
detail_NOT_SUPPORTED, // add
detail_NOT_SUPPORTED, // and
detail_NOT_SUPPORTED, // or
detail_NOT_SUPPORTED, // xor
detail_XCHG128, // exchange
detail_CMP_XCHG128, // compare_exchange_weak
detail_CMP_XCHG128, // compare_exchange_strong
128
)
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
detail_LOAD128, // load
detail_STORE128, // store
detail_NOT_SUPPORTED, // add
detail_NOT_SUPPORTED, // and
detail_NOT_SUPPORTED, // or
detail_NOT_SUPPORTED, // xor
detail_XCHG128, // exchange
detail_CMP_XCHG128, // compare_exchange_weak
detail_CMP_XCHG128, // compare_exchange_strong
ptr2x
)
#undef detail_LOAD128
#undef detail_STORE128
#undef detail_XCHG128
#undef detail_CMP_XCHG128
#else
Baselib_Atomic_FOR_EACH_ATOMIC_OP_AND_MEMORY_ORDER(
detail_LOAD_NOT_CONST, // load
detail_STORE, // store
detail_NOT_SUPPORTED, // add
detail_NOT_SUPPORTED, // and
detail_NOT_SUPPORTED, // or
detail_NOT_SUPPORTED, // xor
detail_LOAD_STORE, // exchange
detail_CMP_XCHG, // compare_exchange_weak
detail_CMP_XCHG, // compare_exchange_strong
ptr2x, 64, int64_t
)
#endif
#undef detail_THREAD_FENCE
#undef detail_LOAD
#undef detail_LOAD_NOT_CONST
#undef detail_STORE
#undef detail_LOAD_STORE
#undef detail_CMP_XCHG
#undef detail_NOT_SUPPORTED
#undef detail_LOAD_BITS_8
#undef detail_LOAD_BITS_16
#undef detail_LOAD_BITS_32
#undef detail_LOAD_BITS_64
#undef detail_STORE_BITS_8
#undef detail_STORE_BITS_16
#undef detail_STORE_BITS_32
#undef detail_STORE_BITS_64
#undef detail_intrinsic_exchange
#undef detail_intrinsic_fetch_add
#undef detail_intrinsic_fetch_and
#undef detail_intrinsic_fetch_or
#undef detail_intrinsic_fetch_xor
#undef detail_relaxed_relaxed
#undef detail_relaxed_acquire
#undef detail_relaxed_release
#undef detail_relaxed_acq_rel
#undef detail_relaxed_seq_cst
#undef detail_acquire_relaxed
#undef detail_acquire_acquire
#undef detail_acquire_release
#undef detail_acquire_acq_rel
#undef detail_acquire_seq_cst
#undef detail_release_relaxed
#undef detail_release_acquire
#undef detail_release_release
#undef detail_release_acq_rel
#undef detail_release_seq_cst
#undef detail_acq_rel_relaxed
#undef detail_acq_rel_acquire
#undef detail_acq_rel_release
#undef detail_acq_rel_acq_rel
#undef detail_acq_rel_seq_cst
#undef detail_seq_cst_relaxed
#undef detail_seq_cst_acquire
#undef detail_seq_cst_release
#undef detail_seq_cst_acq_rel
#undef detail_seq_cst_seq_cst
#undef detail_relaxed
#undef detail_acquire
#undef detail_release
#undef detail_acq_rel
#undef detail_seq_cst

View File

@@ -0,0 +1,58 @@
#pragma once
#include <intrin.h>
#ifndef _ARM_BARRIER_ISH
#define _ARM_BARRIER_ISH 0xB
#endif
#define _InterlockedCompareExchange32(obj, value, exp) _InterlockedCompareExchange((long*)obj, value, exp)
#define _InterlockedCompareExchange32_nf(obj, value, exp) _InterlockedCompareExchange_nf((long*)obj, value, exp)
#define _InterlockedCompareExchange32_acq(obj, value, exp) _InterlockedCompareExchange_acq((long*)obj, value, exp)
#define _InterlockedCompareExchange32_rel(obj, value, exp) _InterlockedCompareExchange_rel((long*)obj, value, exp)
#define _InterlockedExchange32(obj, value) _InterlockedExchange((long*)obj, value)
#define _InterlockedExchange32_nf(obj, value) _InterlockedExchange_nf((long*)obj, value)
#define _InterlockedExchange32_acq(obj, value) _InterlockedExchange_acq((long*)obj, value)
#define _InterlockedExchange32_rel(obj, value) _InterlockedExchange_rel((long*)obj, value)
#define _InterlockedExchangeAdd32(obj, value) _InterlockedExchangeAdd((long*)obj, value)
#define _InterlockedExchangeAdd32_nf(obj, value) _InterlockedExchangeAdd_nf((long*)obj, value)
#define _InterlockedExchangeAdd32_acq(obj, value) _InterlockedExchangeAdd_acq((long*)obj, value)
#define _InterlockedExchangeAdd32_rel(obj, value) _InterlockedExchangeAdd_rel((long*)obj, value)
#define _InterlockedAnd32(obj, value) _InterlockedAnd((long*)obj, value)
#define _InterlockedAnd32_nf(obj, value) _InterlockedAnd_nf((long*)obj, value)
#define _InterlockedAnd32_acq(obj, value) _InterlockedAnd_acq((long*)obj, value)
#define _InterlockedAnd32_rel(obj, value) _InterlockedAnd_rel((long*)obj, value)
#define _InterlockedOr32(obj, value) _InterlockedOr((long*)obj, value)
#define _InterlockedOr32_nf(obj, value) _InterlockedOr_nf((long*)obj, value)
#define _InterlockedOr32_acq(obj, value) _InterlockedOr_acq((long*)obj, value)
#define _InterlockedOr32_rel(obj, value) _InterlockedOr_rel((long*)obj, value)
#define _InterlockedXor32(obj, value) _InterlockedXor((long*)obj, value)
#define _InterlockedXor32_nf(obj, value) _InterlockedXor_nf((long*)obj, value)
#define _InterlockedXor32_acq(obj, value) _InterlockedXor_acq((long*)obj, value)
#define _InterlockedXor32_rel(obj, value) _InterlockedXor_rel((long*)obj, value)
// Use cmp_xchg on x86 to emulate 64 bit exchange and alu ops
#if defined(_M_IX86)
#undef _InterlockedExchange64
#undef _InterlockedExchangeAdd64
#undef _InterlockedOr64
#undef _InterlockedAnd64
#undef _InterlockedXor64
#define detail_CAS_OP(_name, ...) \
static __forceinline __int64 _name(__int64* obj, __int64 value) \
{ \
__int64 p1, p2 = *obj; \
do { p1 = p2; p2 = _InterlockedCompareExchange64(obj, (__VA_ARGS__), p1); } while (p1 != p2); \
return p1; \
}
detail_CAS_OP(_InterlockedExchange64, value);
detail_CAS_OP(_InterlockedExchangeAdd64, p1 + value);
detail_CAS_OP(_InterlockedOr64, p1 | value);
detail_CAS_OP(_InterlockedAnd64, p1 & value);
detail_CAS_OP(_InterlockedXor64, p1 ^ value);
#undef detail_CAS_OP
#endif