[add] first

This commit is contained in:
2023-10-08 10:24:48 +08:00
commit b1ae0510a9
1048 changed files with 3254361 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
#pragma once
namespace baselib
{
BASELIB_CPP_INTERFACE
{
namespace Algorithm
{
namespace detail
{
template<typename T>
static FORCE_INLINE constexpr T LogicalOrRShiftOp(T value, int shift) { return value | (value >> shift); }
}
}
}
}

View File

@@ -0,0 +1,63 @@
#pragma once
namespace baselib
{
BASELIB_CPP_INTERFACE
{
namespace Algorithm
{
inline int HighestBitNonZero(uint32_t value)
{
return 31 - __builtin_clz(value);
}
inline int HighestBitNonZero(uint64_t value)
{
#if PLATFORM_ARCH_64
return 63 - __builtin_clzll(value);
#else
return (value & 0xffffffff00000000ULL) ? (63 - __builtin_clz((uint32_t)(value >> 32))) : (31 - __builtin_clz((uint32_t)value));
#endif
}
inline int HighestBit(uint32_t value)
{
return value == 0 ? -1 : HighestBitNonZero(value);
}
inline int HighestBit(uint64_t value)
{
return value == 0 ? -1 : HighestBitNonZero(value);
}
inline int LowestBitNonZero(uint32_t value)
{
return __builtin_ctz(value);
}
inline int LowestBitNonZero(uint64_t value)
{
#if PLATFORM_ARCH_64
return __builtin_ctzll(value);
#else
return (value & 0x00000000ffffffffULL) ? __builtin_ctz((uint32_t)(value)) : (32 + __builtin_ctz((uint32_t)(value >> 32)));
#endif
}
inline int LowestBit(uint32_t value)
{
return value == 0 ? -1 : LowestBitNonZero(value);
}
inline int LowestBit(uint64_t value)
{
return value == 0 ? -1 : LowestBitNonZero(value);
}
inline int BitsInMask(uint64_t mask) { return __builtin_popcountll(mask); }
inline int BitsInMask(uint32_t mask) { return __builtin_popcount(mask); }
inline int BitsInMask(uint16_t mask) { return BitsInMask((uint32_t)mask); }
inline int BitsInMask(uint8_t mask) { return BitsInMask((uint32_t)mask); }
}
}
}

View File

@@ -0,0 +1,131 @@
#pragma once
#include <intrin.h>
#pragma intrinsic(_BitScanReverse)
#if PLATFORM_ARCH_64
#pragma intrinsic(_BitScanReverse64)
#endif
namespace baselib
{
BASELIB_CPP_INTERFACE
{
namespace Algorithm
{
inline int HighestBit(uint32_t value)
{
unsigned long res;
return _BitScanReverse(&res, value) ? (int)res : -1;
}
inline int HighestBit(uint64_t value)
{
#if PLATFORM_ARCH_64
unsigned long res;
return _BitScanReverse64(&res, value) ? (int)res : -1;
#else
unsigned long lower, upper;
int lower_int = _BitScanReverse(&lower, (uint32_t)value) ? (int)lower : -1;
return _BitScanReverse(&upper, (uint32_t)(value >> 32)) ? (int)(32 + upper) : lower_int;
#endif
}
inline int HighestBitNonZero(uint32_t value)
{
unsigned long res = 0;
_BitScanReverse(&res, value);
return (int)res;
}
inline int HighestBitNonZero(uint64_t value)
{
#if PLATFORM_ARCH_64
unsigned long res = 0;
_BitScanReverse64(&res, value);
return (int)res;
#else
unsigned long lower, upper;
_BitScanReverse(&lower, (uint32_t)value);
return _BitScanReverse(&upper, (uint32_t)(value >> 32)) ? (32 + upper) : lower;
#endif
}
inline int LowestBit(uint32_t value)
{
unsigned long res;
return _BitScanForward(&res, value) ? (int)res : -1;
}
inline int LowestBit(uint64_t value)
{
#if PLATFORM_ARCH_64
unsigned long res;
return _BitScanForward64(&res, value) ? (int)res : -1;
#else
unsigned long lower, upper;
int upper_int = _BitScanForward(&upper, (uint32_t)(value >> 32)) ? (int)upper : -33;
return _BitScanForward(&lower, (uint32_t)(value)) ? (int)lower : (32 + upper_int);
#endif
}
inline int LowestBitNonZero(uint32_t value)
{
unsigned long res = 0;
_BitScanForward(&res, value);
return (int)res;
}
inline int LowestBitNonZero(uint64_t value)
{
#if PLATFORM_ARCH_64
unsigned long res = 0;
_BitScanForward64(&res, value);
return (int)res;
#else
unsigned long lower, upper;
_BitScanForward(&upper, (uint32_t)(value >> 32));
return _BitScanForward(&lower, (uint32_t)(value)) ? (int)lower : (int)(32 + upper);
#endif
}
// __popcnt/__popcnt16/__popcnt64 were introduced as part of SSE4a
// See https://en.wikipedia.org/wiki/SSE4#POPCNT_and_LZCNT
// To check this accurately, we would need to check cpuid which itself is not for free.
// However, compiling for some hardware, MSVC defines __AVX__ which is a superset of SSE4 so we can use that.
// (as of writing there's no equivalent __SSE4__)
#if defined(__AVX__)
#ifdef _AMD64_
inline int BitsInMask(uint64_t value) { return (int)__popcnt64(value); }
#else
inline int BitsInMask(uint64_t value) { return BitsInMask((uint32_t)value) + BitsInMask((uint32_t)(value >> 32)); }
#endif
inline int BitsInMask(uint32_t value) { return (int)__popcnt(value); }
inline int BitsInMask(uint16_t value) { return (int)__popcnt16(value); }
inline int BitsInMask(uint8_t value) { return BitsInMask((uint16_t)value); }
// Todo: Consider using VCNT instruction on arm (NEON)
#else
inline int BitsInMask(uint64_t value)
{
// From http://www-graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
value = value - ((value >> 1) & (uint64_t) ~(uint64_t)0 / 3);
value = (value & (uint64_t) ~(uint64_t)0 / 15 * 3) + ((value >> 2) & (uint64_t) ~(uint64_t)0 / 15 * 3);
value = (value + (value >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15;
return (uint64_t)(value * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * 8;
}
inline int BitsInMask(uint32_t value)
{
// From http://www-graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
value = value - ((value >> 1) & 0x55555555);
value = (value & 0x33333333) + ((value >> 2) & 0x33333333);
return (((value + (value >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
}
inline int BitsInMask(uint16_t value) { return BitsInMask((uint32_t)value); }
inline int BitsInMask(uint8_t value) { return BitsInMask((uint32_t)value); }
#endif
}
}
}

View File

@@ -0,0 +1,38 @@
#pragma once
#include "../Atomic.h"
namespace baselib
{
BASELIB_CPP_INTERFACE
{
namespace detail
{
struct ConditionVariableData
{
atomic<int32_t> waiters;
atomic<int32_t> wakeups;
ConditionVariableData() : waiters(0), wakeups(0) {}
inline bool HasWaiters() const
{
return waiters.load(memory_order_acquire) > 0;
}
inline bool TryConsumeWakeup()
{
int32_t previousCount = wakeups.load(memory_order_relaxed);
while (previousCount > 0)
{
if (wakeups.compare_exchange_weak(previousCount, previousCount - 1, memory_order_acquire, memory_order_relaxed))
{
return true;
}
}
return false;
}
};
}
}
}

View File

@@ -0,0 +1,26 @@
#pragma once
#include "../Atomic.h"
#include "../Semaphore.h"
namespace baselib
{
BASELIB_CPP_INTERFACE
{
namespace detail
{
struct ConditionVariableData
{
Semaphore semaphore;
atomic<uint32_t> waiters;
ConditionVariableData() : semaphore(), waiters(0) {}
inline bool HasWaiters() const
{
return waiters.load(memory_order_acquire) > 0;
}
};
}
}
}

View File

@@ -0,0 +1,86 @@
#pragma once
#include "../CountdownTimer.h"
#include "../../C/Baselib_SystemFutex.h"
#include "../../C/Baselib_Thread.h"
#if !PLATFORM_FUTEX_NATIVE_SUPPORT
#error "Only use this implementation on top of a proper futex, in all other situations us ConditionVariable_SemaphoreBased.inl.h"
#endif
namespace baselib
{
BASELIB_CPP_INTERFACE
{
inline void ConditionVariable::Wait()
{
m_Data.waiters.fetch_add(1, memory_order_relaxed);
m_Lock.Release();
while (!m_Data.TryConsumeWakeup())
{
Baselib_SystemFutex_Wait(&m_Data.wakeups.obj, 0, std::numeric_limits<uint32_t>::max());
}
m_Lock.Acquire();
}
inline bool ConditionVariable::TimedWait(const timeout_ms timeoutInMilliseconds)
{
m_Data.waiters.fetch_add(1, memory_order_relaxed);
m_Lock.Release();
uint32_t timeLeft = timeoutInMilliseconds.count();
auto timer = CountdownTimer::StartNew(timeoutInMilliseconds);
do
{
Baselib_SystemFutex_Wait(&m_Data.wakeups.obj, 0, timeLeft);
if (m_Data.TryConsumeWakeup())
{
m_Lock.Acquire();
return true;
}
timeLeft = timer.GetTimeLeftInMilliseconds().count();
}
while (timeLeft);
do
{
int32_t waiters = m_Data.waiters.load(memory_order_relaxed);
while (waiters > 0)
{
if (m_Data.waiters.compare_exchange_weak(waiters, waiters - 1, memory_order_relaxed, memory_order_relaxed))
{
m_Lock.Acquire();
return false;
}
}
Baselib_Thread_YieldExecution();
}
while (!m_Data.TryConsumeWakeup());
m_Lock.Acquire();
return true;
}
inline void ConditionVariable::Notify(uint16_t count)
{
int32_t waitingThreads = m_Data.waiters.load(memory_order_acquire);
do
{
int32_t threadsToWakeup = count < waitingThreads ? count : waitingThreads;
if (threadsToWakeup == 0)
{
atomic_thread_fence(memory_order_release);
return;
}
if (m_Data.waiters.compare_exchange_weak(waitingThreads, waitingThreads - threadsToWakeup, memory_order_relaxed, memory_order_relaxed))
{
m_Data.wakeups.fetch_add(threadsToWakeup, memory_order_release);
Baselib_SystemFutex_Notify(&m_Data.wakeups.obj, threadsToWakeup, Baselib_WakeupFallbackStrategy_OneByOne);
return;
}
}
while (waitingThreads > 0);
}
}
}

View File

@@ -0,0 +1,61 @@
#pragma once
namespace baselib
{
BASELIB_CPP_INTERFACE
{
inline void ConditionVariable::Wait()
{
m_Data.waiters.fetch_add(1, memory_order_relaxed);
m_Lock.Release();
m_Data.semaphore.Acquire();
m_Lock.Acquire();
}
inline bool ConditionVariable::TimedWait(const timeout_ms timeoutInMilliseconds)
{
m_Data.waiters.fetch_add(1, memory_order_relaxed);
m_Lock.Release();
bool acquired = m_Data.semaphore.TryTimedAcquire(timeoutInMilliseconds);
if (acquired)
{
m_Lock.Acquire();
return true;
}
do
{
uint32_t waiters = m_Data.waiters.load(memory_order_relaxed);
while (waiters > 0)
{
if (m_Data.waiters.compare_exchange_weak(waiters, waiters - 1, memory_order_relaxed, memory_order_relaxed))
{
m_Lock.Acquire();
return false;
}
}
Baselib_Thread_YieldExecution();
}
while (!m_Data.semaphore.TryAcquire());
m_Lock.Acquire();
return true;
}
inline void ConditionVariable::Notify(uint16_t count)
{
uint32_t waitingThreads, threadsToWakeup;
do
{
waitingThreads = m_Data.waiters.load(memory_order_acquire);
threadsToWakeup = count < waitingThreads ? count : waitingThreads;
if (threadsToWakeup == 0)
return;
}
while (!m_Data.waiters.compare_exchange_weak(waitingThreads, waitingThreads - threadsToWakeup, memory_order_relaxed, memory_order_relaxed));
m_Data.semaphore.Release(threadsToWakeup);
}
}
}

View File

@@ -0,0 +1,22 @@
#pragma once
#include <type_traits>
namespace baselib
{
BASELIB_CPP_INTERFACE
{
// workaround for missing std::is_trivially_copyable
// this can't be put inside compiler env due to __GLIBCXX__ not being set at that point
#if (defined(__GLIBCXX__) && __GLIBCXX__ <= 20150623) || (COMPILER_GCC && __GNUC__ < 5)
template<typename T> struct is_trivially_copyable : std::has_trivial_copy_constructor<T> {};
#else
template<typename T> struct is_trivially_copyable : std::is_trivially_copyable<T> {};
#endif
template<typename T, size_t S> struct is_trivial_of_size : std::integral_constant<bool, is_trivially_copyable<T>::value && (sizeof(T) == S)> {};
template<typename T, size_t S> struct is_integral_of_size : std::integral_constant<bool, std::is_integral<T>::value && (sizeof(T) == S)> {};
template<typename T, typename T2> struct is_of_same_signedness : std::integral_constant<bool, std::is_signed<T>::value == std::is_signed<T2>::value> {};
}
}

View File

@@ -0,0 +1,129 @@
#pragma once
#include "../../C/Baselib_Memory.h"
// Internal, to enable override of default C Api implementation for unit-tests
#ifndef detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL
#define detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL 0
#endif
namespace baselib
{
BASELIB_CPP_INTERFACE
{
namespace detail
{
// Default memory allocation methods
struct heap_allocator_impl
{
static constexpr auto Baselib_Memory_Allocate = ::Baselib_Memory_Allocate;
static constexpr auto Baselib_Memory_Reallocate = ::Baselib_Memory_Reallocate;
static constexpr auto Baselib_Memory_Free = ::Baselib_Memory_Free;
static constexpr auto Baselib_Memory_AlignedAllocate = ::Baselib_Memory_AlignedAllocate;
static constexpr auto Baselib_Memory_AlignedReallocate = ::Baselib_Memory_AlignedReallocate;
static constexpr auto Baselib_Memory_AlignedFree = ::Baselib_Memory_AlignedFree;
};
// Test memory allocation methods
struct heap_allocator_impl_test
{
static void* Baselib_Memory_Allocate(size_t);
static void* Baselib_Memory_Reallocate(void*, size_t);
static void Baselib_Memory_Free(void*);
static void* Baselib_Memory_AlignedAllocate(size_t, size_t);
static void* Baselib_Memory_AlignedReallocate(void*, size_t, size_t);
static void Baselib_Memory_AlignedFree(void*);
};
template<uint32_t alignment>
class heap_allocator
{
// Use test memory allocation implementation if detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL is true, otherwise Baselib_Memory_*
using BaseImpl = typename std::conditional<detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL, heap_allocator_impl_test, heap_allocator_impl>::type;
// Memory allocation functions - alignment requirements <= Baselib_Memory_MinGuaranteedAlignment
struct MinAlignedImpl
{
static void* allocate(size_t size, Baselib_ErrorState *error_state_ptr)
{
UNUSED(error_state_ptr);
return BaseImpl::Baselib_Memory_Allocate(size);
}
static void* reallocate(void* ptr, size_t old_size, size_t new_size, Baselib_ErrorState *error_state_ptr)
{
UNUSED(error_state_ptr);
UNUSED(old_size);
return BaseImpl::Baselib_Memory_Reallocate(ptr, new_size);
}
static bool deallocate(void* ptr, size_t size, Baselib_ErrorState *error_state_ptr)
{
UNUSED(error_state_ptr);
UNUSED(size);
BaseImpl::Baselib_Memory_Free(ptr);
return true;
}
};
// Aligned memory allocation functions - alignment requirements > Baselib_Memory_MinGuaranteedAlignment
struct AlignedImpl
{
static void* allocate(size_t size, Baselib_ErrorState *error_state_ptr)
{
UNUSED(error_state_ptr);
return BaseImpl::Baselib_Memory_AlignedAllocate(size, alignment);
}
static void* reallocate(void* ptr, size_t old_size, size_t new_size, Baselib_ErrorState *error_state_ptr)
{
UNUSED(error_state_ptr);
UNUSED(old_size);
return BaseImpl::Baselib_Memory_AlignedReallocate(ptr, new_size, alignment);
}
static bool deallocate(void* ptr, size_t size, Baselib_ErrorState *error_state_ptr)
{
UNUSED(error_state_ptr);
UNUSED(size);
BaseImpl::Baselib_Memory_AlignedFree(ptr);
return true;
}
};
static FORCE_INLINE constexpr size_t AlignedSize(size_t size)
{
return (size + alignment - 1) & ~(alignment - 1);
}
public:
static constexpr size_t max_alignment = Baselib_Memory_MaxAlignment;
static constexpr size_t optimal_size(size_t size)
{
return AlignedSize(size);
}
// Use aligned memory allocations methods if alignment > Baselib_Memory_MinGuaranteedAlignment
using Impl = typename std::conditional<(alignment > Baselib_Memory_MinGuaranteedAlignment), AlignedImpl, MinAlignedImpl>::type;
static void* allocate(size_t size, Baselib_ErrorState* error_state_ptr)
{
return Impl::allocate(size, error_state_ptr);
}
static void* reallocate(void* ptr, size_t old_size, size_t new_size, Baselib_ErrorState* error_state_ptr)
{
return Impl::reallocate(ptr, old_size, new_size, error_state_ptr);
}
static bool deallocate(void* ptr, size_t size, Baselib_ErrorState* error_state_ptr)
{
return Impl::deallocate(ptr, size, error_state_ptr);
}
};
}
}
}
#undef detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL

View File

@@ -0,0 +1,95 @@
#pragma once
#include "../../C/Baselib_Memory.h"
#include "../../Cpp/Algorithm.h"
// Internal, to enable override of default C Api implementation for unit-tests
#ifndef detail_BASELIB_PAGE_ALLOCATOR_TEST_IMPL
#define detail_BASELIB_PAGE_ALLOCATOR_TEST_IMPL 0
#endif
namespace baselib
{
BASELIB_CPP_INTERFACE
{
namespace detail
{
// Default memory allocation methods
struct page_allocator_impl
{
static constexpr auto Baselib_Memory_AllocatePages = ::Baselib_Memory_AllocatePages;
static constexpr auto Baselib_Memory_ReleasePages = ::Baselib_Memory_ReleasePages;
static constexpr auto Baselib_Memory_SetPageState = ::Baselib_Memory_SetPageState;
};
// Test memory allocation methods
struct page_allocator_impl_test
{
static Baselib_Memory_PageAllocation Baselib_Memory_AllocatePages(uint64_t pageSize, uint64_t pageCount, uint64_t alignmentInMultipleOfPageSize, Baselib_Memory_PageState pageState, Baselib_ErrorState* errorState);
static void Baselib_Memory_ReleasePages(Baselib_Memory_PageAllocation pageAllocation, Baselib_ErrorState* errorState);
static void Baselib_Memory_SetPageState(void* addressOfFirstPage, uint64_t pageSize, uint64_t pageCount, Baselib_Memory_PageState pageState, Baselib_ErrorState* errorState);
};
typedef enum Memory_PageState : int
{
Memory_PageState_Reserved = Baselib_Memory_PageState_Reserved,
Memory_PageState_NoAccess = Baselib_Memory_PageState_NoAccess,
Memory_PageState_ReadOnly = Baselib_Memory_PageState_ReadOnly,
Memory_PageState_ReadWrite = Baselib_Memory_PageState_ReadWrite,
Memory_PageState_ReadOnly_Executable = Baselib_Memory_PageState_ReadOnly_Executable | Baselib_Memory_PageState_ReadOnly,
Memory_PageState_ReadWrite_Executable = Baselib_Memory_PageState_ReadWrite_Executable | Baselib_Memory_PageState_ReadWrite,
} Memory_PageState;
template<uint32_t alignment>
class page_allocator
{
// Use test memory allocation implementation if detail_BASELIB_HEAP_ALLOCATOR_TEST_IMPL is true
using Impl = typename std::conditional<detail_BASELIB_PAGE_ALLOCATOR_TEST_IMPL, page_allocator_impl_test, page_allocator_impl>::type;
const size_t m_PageSize;
const size_t m_PageSizeAligned;
FORCE_INLINE constexpr size_t PagedCountFromSize(size_t size) const
{
return (size + (m_PageSize - 1)) / m_PageSize;
}
FORCE_INLINE size_t DefaultPageSize() const
{
Baselib_Memory_PageSizeInfo info;
Baselib_Memory_GetPageSizeInfo(&info);
return static_cast<size_t>(info.defaultPageSize);
}
public:
page_allocator() : page_allocator(DefaultPageSize()) {}
page_allocator(size_t page_size) : m_PageSize(page_size), m_PageSizeAligned(page_size > alignment ? page_size : alignment) {}
void* allocate(size_t size, int state, Baselib_ErrorState *error_state_ptr) const
{
Baselib_Memory_PageAllocation pa = Impl::Baselib_Memory_AllocatePages(m_PageSize, PagedCountFromSize(size), m_PageSizeAligned / m_PageSize, (Baselib_Memory_PageState)state, error_state_ptr);
return pa.ptr;
}
bool deallocate(void* ptr, size_t size, Baselib_ErrorState *error_state_ptr) const
{
Impl::Baselib_Memory_ReleasePages({ptr, m_PageSize, PagedCountFromSize(size)}, error_state_ptr);
return (error_state_ptr->code == Baselib_ErrorCode_Success);
}
constexpr size_t optimal_size(size_t size) const
{
return (size + m_PageSizeAligned - 1) & ~(m_PageSizeAligned - 1);
}
bool set_page_state(void* ptr, size_t size, int state, Baselib_ErrorState *error_state_ptr) const
{
Impl::Baselib_Memory_SetPageState(ptr, m_PageSize, PagedCountFromSize(size), (Baselib_Memory_PageState)state, error_state_ptr);
return (error_state_ptr->code == Baselib_ErrorCode_Success);
}
};
}
}
}
#undef detail_BASELIB_PAGE_ALLOCATOR_TEST_IMPL

View File

@@ -0,0 +1,365 @@
#pragma once
#include "../Lock.h"
#include "../mpmc_node_queue.h"
#include "../Algorithm.h"
#include <algorithm>
#include <type_traits>
#include <cstring>
namespace baselib
{
BASELIB_CPP_INTERFACE
{
namespace detail
{
template<class Allocator>
class tlsf_block_allocator
{
baselib::Lock m_CapacityLock;
ALIGNED_ATOMIC(size_t) m_Capacity;
baselib::mpmc_node_queue<baselib::mpmc_node> m_FreeBlocks;
struct Segment
{
uintptr_t data;
size_t size;
Segment *next;
} *m_Segments;
void LinkSegment(Segment* segment, const size_t block_size, size_t block_count)
{
uintptr_t nodeData = segment->data;
baselib::mpmc_node* firstNode = reinterpret_cast<baselib::mpmc_node*>(nodeData);
baselib::mpmc_node* node = firstNode;
for (size_t i = 0; i < block_count; ++i)
{
node = reinterpret_cast<baselib::mpmc_node*>(nodeData);
nodeData += block_size;
node->next.obj = reinterpret_cast<baselib::mpmc_node*>(nodeData);
}
m_FreeBlocks.push_back(firstNode, node);
}
bool ExpandCapacity(size_t size, size_t block_size, Allocator& allocator)
{
if (size == 0)
return true;
// Align to underlying allocator alignment. Size requested must also be of at least block_size
block_size = baselib::Algorithm::CeilAligned(block_size, alignment);
size = std::max(baselib::Algorithm::CeilAligned(size, alignment), block_size);
// Consider base allocator optimal size from required size. I.e if higher than size requested, expand using optimal size.
const size_t minSize = size + sizeof(Segment);
const size_t optimalSize = allocator.optimal_size(minSize);
const size_t segment_size = std::max(optimalSize, minSize);
const size_t block_count = size / block_size;
// Allocate one memory block that contains block data and Segment info.
uintptr_t segmentMemory = reinterpret_cast<uintptr_t>(allocator.allocate(segment_size));
if (segmentMemory == 0)
return false;
// Store data ptr and size information in segment header
Segment* segment = reinterpret_cast<Segment*>(segmentMemory + size);
segment->data = segmentMemory;
segment->size = segment_size;
// Link segment to existing segments and add capacity.
// This function is in the scope of a locked `m_CapacityLock` which has an implicit acquire (lock) release (unlock) barrier.
// Order of m_Segments and m_Capacity is irrelevant. Calling `allocate` from other threads may result in a successful allocation but
// that is not a problem since this process repeats in the case of being called from `allocate` and container is pre-emtped.
// The side effect of not
segment->next = m_Segments;
m_Segments = segment;
LinkSegment(segment, block_size, block_count);
baselib::atomic_fetch_add_explicit(m_Capacity, block_size * block_count, baselib::memory_order_relaxed);
return true;
}
public:
static constexpr uint32_t alignment = Allocator::alignment;
// non-copyable
tlsf_block_allocator(const tlsf_block_allocator& other) = delete;
tlsf_block_allocator& operator=(const tlsf_block_allocator& other) = delete;
// non-movable (strictly speaking not needed but listed to signal intent)
tlsf_block_allocator(tlsf_block_allocator&& other) = delete;
tlsf_block_allocator& operator=(tlsf_block_allocator&& other) = delete;
tlsf_block_allocator() : m_CapacityLock(), m_Capacity(0), m_FreeBlocks(), m_Segments(nullptr) {}
void* allocate()
{
return m_FreeBlocks.try_pop_front();
}
bool deallocate(void* ptr)
{
m_FreeBlocks.push_back(reinterpret_cast<baselib::mpmc_node*>(ptr));
return true;
}
bool deallocate(void* ptr_first, void* ptr_last)
{
m_FreeBlocks.push_back(reinterpret_cast<baselib::mpmc_node*>(ptr_first), reinterpret_cast<baselib::mpmc_node*>(ptr_last));
return true;
}
void deallocate_segments(Allocator& allocator)
{
Segment *segment = m_Segments;
while (segment)
{
Segment *nextSegment = segment->next;
allocator.deallocate(reinterpret_cast<void *>(segment->data), segment->size);
segment = nextSegment;
}
}
void reset_segments()
{
if (m_Segments)
{
m_Segments = nullptr;
m_Capacity = 0;
m_FreeBlocks.~mpmc_node_queue<baselib::mpmc_node>();
new(&m_FreeBlocks) mpmc_node_queue<baselib::mpmc_node>();
}
}
bool reserve(size_t size, size_t capacity, Allocator& allocator)
{
bool result;
m_CapacityLock.AcquireScoped([&] {
result = capacity > m_Capacity ? ExpandCapacity(capacity - m_Capacity, size, allocator) : true;
});
return result;
}
bool increase_capacity(size_t size, Allocator& allocator)
{
bool result = true;
m_CapacityLock.AcquireScoped([&] {
if (m_FreeBlocks.empty())
result = ExpandCapacity(m_Capacity == 0 ? size : m_Capacity, size, allocator);
});
return result;
}
size_t capacity() const
{
return baselib::atomic_load_explicit(m_Capacity, baselib::memory_order_relaxed);
}
static constexpr size_t optimal_size(const size_t size)
{
return baselib::Algorithm::CeilAligned(size, alignment);
}
};
template<size_t min_size, size_t max_size, size_t linear_subdivisions, class BaseAllocator>
class tlsf_allocator : private BaseAllocator
{
using BlockAllocator = detail::tlsf_block_allocator<BaseAllocator>;
public:
static constexpr uint32_t alignment = BaseAllocator::alignment;
// non-copyable
tlsf_allocator(const tlsf_allocator& other) = delete;
tlsf_allocator& operator=(const tlsf_allocator& other) = delete;
// non-movable (strictly speaking not needed but listed to signal intent)
tlsf_allocator(tlsf_allocator&& other) = delete;
tlsf_allocator& operator=(tlsf_allocator&& other) = delete;
tlsf_allocator() : m_Allocators() {}
~tlsf_allocator() { DeallocateSegmentsImpl(); }
void* try_allocate(size_t size)
{
return getAllocator(size).allocate();
}
void* allocate(size_t size)
{
BlockAllocator& allocator = getAllocator(size);
do
{
void* p;
if (OPTIMIZER_LIKELY(p = allocator.allocate()))
return p;
if (!allocator.increase_capacity(AllocatorSize(size), static_cast<BaseAllocator&>(*this)))
return nullptr;
}
while (true);
}
void* try_reallocate(void* ptr, size_t old_size, size_t new_size)
{
return ReallocateImpl<true>(ptr, old_size, new_size);
}
void* reallocate(void* ptr, size_t old_size, size_t new_size)
{
return ReallocateImpl<false>(ptr, old_size, new_size);
}
bool deallocate(void* ptr, size_t size)
{
return ptr == nullptr ? true : getAllocator(size).deallocate(ptr);
}
void deallocate_all()
{
atomic_thread_fence(memory_order_acquire);
DeallocateSegmentsImpl();
for (auto& pow2Allocators : m_Allocators)
for (auto& blockAllocator : pow2Allocators)
blockAllocator.reset_segments();
atomic_thread_fence(memory_order_release);
}
bool batch_deallocate(void* ptr_first, void* ptr_last, size_t size)
{
return ((ptr_first == nullptr) || (ptr_last == nullptr)) ? false : getAllocator(size).deallocate(ptr_first, ptr_last);
}
void batch_deallocate_link(void* ptr, void* ptr_next)
{
reinterpret_cast<baselib::mpmc_node*>(ptr)->next = reinterpret_cast<baselib::mpmc_node*>(ptr_next);
}
bool reserve(size_t size, size_t capacity)
{
return getAllocator(size).reserve(AllocatorSize(size), capacity, static_cast<BaseAllocator&>(*this));
}
size_t capacity(size_t size)
{
return getAllocator(size).capacity();
}
static constexpr size_t optimal_size(const size_t size)
{
return size == 0 ? 0 : BlockAllocator::optimal_size(AllocatorSize(size));
}
private:
struct CompileTime
{
static constexpr size_t Log2Base(size_t value, size_t offset) { return (value > 1) ? Log2Base(value >> (size_t)1, offset + 1) : offset; }
static constexpr size_t Log2Base(size_t value) { return Log2Base(value, 0); }
static constexpr size_t Max(size_t a, size_t b) { return a > b ? a : b; }
};
static constexpr size_t m_MinSize = CompileTime::Max(min_size, CompileTime::Max(CompileTime::Max(sizeof(void*), linear_subdivisions), alignment));
static constexpr size_t m_MinSizePow2 = baselib::Algorithm::CeilPowerOfTwo(m_MinSize);
static constexpr size_t m_MaxSizePow2 = baselib::Algorithm::CeilPowerOfTwo(CompileTime::Max(max_size, m_MinSize));
static constexpr size_t m_MinSizeMask = static_cast<size_t>(1) << CompileTime::Log2Base(m_MinSizePow2 - 1);
static constexpr size_t m_AllocatorCount = (CompileTime::Log2Base(m_MaxSizePow2) - CompileTime::Log2Base(m_MinSizePow2)) + 1;
static constexpr size_t m_AllocatorBaseOffsetLog2 = CompileTime::Log2Base(m_MinSizePow2) - 1;
static constexpr size_t m_LinearSubdivisionsLog2 = CompileTime::Log2Base(linear_subdivisions);
static constexpr size_t AllocatorSizeLog2(size_t size) { return baselib::Algorithm::HighestBitNonZero(size | m_MinSizeMask); }
static constexpr size_t LinearAllocatorSizeLog2(size_t size, size_t sizeLog2) { return (size & ((size_t)1 << sizeLog2) - 1) >> (sizeLog2 - m_LinearSubdivisionsLog2); }
template<int value = ((m_AllocatorCount == 1 && linear_subdivisions == 1) ? 1 : 2), typename std::enable_if<(value == 1), int>::type = 0>
static constexpr FORCE_INLINE size_t AllocatorSize(size_t size)
{
return m_MinSizePow2;
}
template<int value = ((m_AllocatorCount != 1 && linear_subdivisions == 1) ? 3 : 4), typename std::enable_if<(value == 3), int>::type = 0>
static constexpr FORCE_INLINE size_t AllocatorSize(size_t size)
{
return (size_t)1 << (AllocatorSizeLog2(size - 1) + 1);
}
template<int value = (linear_subdivisions == 1) ? 0 : 1, typename std::enable_if<(value), int>::type = 0>
static FORCE_INLINE size_t AllocatorSize(size_t size)
{
const size_t subDivSize = ((size_t)1 << baselib::Algorithm::HighestBitNonZero(size)) >> m_LinearSubdivisionsLog2;
return (size - 1 & ~(subDivSize - 1)) + subDivSize;
}
template<int value = ((m_AllocatorCount == 1 && linear_subdivisions == 1) ? 1 : 2), typename std::enable_if<(value == 1), int>::type = 0>
BlockAllocator& getAllocator(size_t)
{
return m_Allocators[0][0];
}
template<int value = ((m_AllocatorCount != 1 && linear_subdivisions == 1) ? 3 : 4), typename std::enable_if<(value == 3), int>::type = 0>
BlockAllocator& getAllocator(const size_t size)
{
return m_Allocators[AllocatorSizeLog2(size - 1) - m_AllocatorBaseOffsetLog2][0];
}
template<int value = ((m_AllocatorCount == 1 && linear_subdivisions != 1) ? 5 : 6), typename std::enable_if<(value == 5), int>::type = 0>
BlockAllocator& getAllocator(size_t size)
{
--size;
return m_Allocators[0][LinearAllocatorSizeLog2(size, AllocatorSizeLog2(size))];
}
template<int value = ((m_AllocatorCount != 1 && linear_subdivisions != 1) ? 7 : 8), typename std::enable_if<(value == 7), int>::type = 0>
BlockAllocator& getAllocator(size_t size)
{
--size;
const size_t sizeLog2 = AllocatorSizeLog2(size);
return m_Allocators[sizeLog2 - m_AllocatorBaseOffsetLog2][LinearAllocatorSizeLog2(size, sizeLog2)];
}
template<typename T> struct has_deallocate_all
{
template<typename U, void (U::*)()> struct Check;
template<typename U> static constexpr bool test(Check<U, &U::deallocate_all> *) { return true; }
template<typename U> static constexpr bool test(...) { return false; }
static constexpr bool value = test<T>(nullptr);
};
template<bool value = has_deallocate_all<BaseAllocator>::value, typename std::enable_if<(value), int>::type = 0>
void DeallocateSegmentsImpl()
{
BaseAllocator::deallocate_all();
}
template<bool value = has_deallocate_all<BaseAllocator>::value, typename std::enable_if<(!value), int>::type = 0>
void DeallocateSegmentsImpl()
{
for (auto& pow2Allocators : m_Allocators)
for (auto& blockAllocator : pow2Allocators)
blockAllocator.deallocate_segments(static_cast<BaseAllocator&>(*this));
}
template<bool use_try_allocate>
void* ReallocateImpl(void* ptr, size_t old_size, size_t new_size)
{
if (ptr == nullptr)
return use_try_allocate ? try_allocate(new_size) : allocate(new_size);
BlockAllocator& oldAllocator = getAllocator(old_size);
BlockAllocator& newAllocator = getAllocator(new_size);
if (&oldAllocator == &newAllocator)
return ptr;
void* newPtr = newAllocator.allocate();
if ((!use_try_allocate) && (newPtr == nullptr))
newPtr = allocate(new_size);
if (newPtr)
{
std::memcpy(newPtr, ptr, std::min(new_size, old_size));
oldAllocator.deallocate(ptr);
}
return newPtr;
}
BlockAllocator m_Allocators[m_AllocatorCount][linear_subdivisions];
};
}
}
}