Compare commits

...

5 Commits

Author SHA1 Message Date
Richard Thier
c88bd10b73 add missing arena.h 2024-09-07 21:25:03 +02:00
Richard Thier
119c3d1af7 fix for VirtualMemList 2024-09-03 20:16:32 +02:00
Richard Thier
5048b88106 added virtualmemlist - also tested using vec_test repo and its very fast 2024-09-03 17:16:00 +02:00
Richard Thier
0587129fc6 more vector compatibility 2024-09-03 16:00:58 +02:00
Richard Thier
04964cd2a0 fixed bad typing (int* instead of T*) and faster branchless codes 2024-09-03 03:35:19 +02:00
5 changed files with 247 additions and 30 deletions

View File

@ -8,6 +8,9 @@
#ifndef TL_NOINLINE
#define TL_NOINLINE __attribute__((noinline))
#endif /* TL_NOINLINE */
#ifndef TL_INLINE
#define TL_INLINE __attribute__((always_inline))
#endif /* TL_INLINE */
#ifndef TL_LIKELY
#define TL_LIKELY(x) __builtin_expect(!!(x), 1)
@ -16,6 +19,10 @@
#define TL_UNLIKELY(x) __builtin_expect(!!(x), 0)
#endif /* TL_UNLIKELY */
#ifndef TL_GROWTH_RATE
#define TL_GROWTH_RATE 2
#endif /* TL_GROWTH_RATE */
typedef void*(MallocLike)(size_t);
typedef void(FreeLike)(void*);
@ -29,39 +36,63 @@ class TurboList {
uint32_t capacity;
TL_NOINLINE T& grow_and_insert(T elem) noexcept {
TL_NOINLINE void grow_and_insert(T elem) noexcept {
// assert(mid == 0);
if(old) FREE(old);
old = nex;
mid = end;
capacity *= 2;
nex = (int *) MALLOC(this->capacity * sizeof(T));
capacity *= TL_GROWTH_RATE;
nex = (T *) MALLOC(this->capacity * sizeof(T));
// Will go into the INSERT code path here
return insert(elem);
insert(elem);
}
template<typename... Args>
TL_NOINLINE T& grow_and_emplace(Args&&... args) noexcept {
// assert(mid == 0);
if(old) FREE(old);
old = nex;
mid = end;
capacity *= TL_GROWTH_RATE;
nex = (T *) MALLOC(this->capacity * sizeof(T));
// Will go into the INSERT code path here
return emplace_back(std::forward<Args>(args)...);
}
public:
inline TurboList(uint32_t initial_size = 0, uint32_t initial_cap = 16) noexcept :
TL_INLINE TurboList(uint32_t initial_size = 0, uint32_t initial_cap = 16) noexcept :
old(nullptr),
mid(0),
end(initial_size),
capacity(initial_cap) {
nex = (int *) MALLOC(this->capacity * sizeof(T));
nex = (T *) MALLOC(this->capacity * sizeof(T));
}
inline ~TurboList() noexcept {
TL_INLINE ~TurboList() noexcept {
if(nex) FREE(nex);
if(old) FREE(old);
}
inline T& operator[](uint32_t i) noexcept {
return (i < mid) ? old[i] : nex[i];
TL_INLINE T& operator[](uint32_t i) const noexcept {
// This seem to be more often compiled to cmov
// branchless conditional codes this way..
//
T *base = (i < mid) ? old : nex;
return base[i];
//
// if(i < mid) return old[i];
// else return nex[i];
//
// T* loc = (T*) ((i < mid) * (size_t)old +
// (i>=mid) * (size_t)nex +
// i* sizeof(T));
// return *loc;
}
/** This is much faster than operator[] if you do small amounts of work per access */
inline void iterate(void(callback)(T&)) noexcept {
TL_INLINE void iterate(void(callback)(T&)) noexcept {
// old
for(uint32_t i = 0; i < mid; ++i) {
callback(old[i]);
@ -72,35 +103,54 @@ public:
}
}
inline T& insert(T elem) noexcept {
/** Vector compatibility: Use insert() if you want the inserted thing as reference too */
TL_INLINE void push_back(T elem) noexcept {
this->insert(elem);
}
/** Vector compatibility: Use pop() if you want the popped thing out as copy too */
TL_INLINE void pop_back() noexcept {
if(end > 0) {
--end;
if(end < mid) { // end > 0 here!
end = mid;
mid = 0;
FREE(nex);
nex = old;
old = nullptr;
}
}
}
TL_INLINE void insert(T elem) noexcept {
if(TL_LIKELY(end < capacity)) {
// INSERT
/* Same as this:
/* Same as this - but in this case it measures as faster:
if(mid > 0) {
nex[mid - 1] = old[mid - 1];
--mid;
nex[mid] = old[mid];
}
*/
bool hasmid = (mid > 0);
mid -= hasmid;
nex[mid] = hasmid ? old[mid] : nex[mid];
return (nex[end++] = elem);
nex[end++] = elem;
} else {
// GROW
return grow_and_insert(elem);
grow_and_insert(elem);
}
}
template<typename... Args>
inline T& emplace(Args&&... args) {
TL_INLINE T& emplace_back(Args&&... args) {
if(TL_LIKELY(end < capacity)) {
// INSERT
/* Same as this:
/* Same as this - but in this case it measures as faster:
if(mid > 0) {
nex[mid - 1] = old[mid - 1];
--mid;
@ -114,21 +164,13 @@ public:
return *new (nex + end++) T(std::forward<Args>(args)...);
} else {
// GROW
//
// Rem.: I just chose this to be less optimized than
// it is possible by making a copy and reusing
// the existing grow and insert code instead of
// writing a new "grow_and_emplace" again.
//
// This happens rarely so its probably fine and
// makes less template instantiations, smaller binary.
return grow_and_insert(T(std::forward<Args>(args)...));
return grow_and_emplace(std::forward<Args>(args)...);
}
}
// TODO: finalize() call which memcpy remaining elements of old into nex and then frees old and sets nullptr + mid = 0;
inline uint32_t size() noexcept {
TL_INLINE uint32_t size() noexcept {
return end;
}
};

66
VirtualMemList.hpp Normal file
View File

@ -0,0 +1,66 @@
#ifndef VIRTUAL_MEM_LIST_H
#define VIRTUAL_MEM_LIST_H
#include "arena.h"
#ifndef VML_NOINLINE
#define VML_NOINLINE __attribute__((noinline))
#endif /* VML_NOINLINE */
#ifndef VML_INLINE
#define VML_INLINE __attribute__((always_inline))
#endif /* VML_INLINE */
#ifndef VML_LIKELY
#define VML_LIKELY(x) __builtin_expect(!!(x), 1)
#endif /* VML_LIKELY */
#ifndef VML_UNLIKELY
#define VML_UNLIKELY(x) __builtin_expect(!!(x), 0)
#endif /* VML_UNLIKELY */
#ifndef VML_GROWTH_RATE
#define VML_GROWTH_RATE 2
#endif /* VML_GROWTH_RATE */
template<typename T>
class VirtualMemList {
arena a;
T *base;
uint32_t end;
public:
VML_INLINE VirtualMemList(uint32_t initial_size = 0) noexcept {
a = newarena((ptrdiff_t)1 << 33);
base = ((T*) alloc(&a, sizeof(T), sizeof(T), 1)) + 1;
end = initial_size;
}
VML_INLINE ~VirtualMemList() noexcept {
// TODO: arena free currently not implemented
}
VML_INLINE T& operator[](uint32_t i) const noexcept {
return base[i];
}
VML_INLINE void push_back(T elem) noexcept {
// Smallest solution:
// base[end++] = elem;
// Non-working, but logically better solution:
// T *value = (T*) alloc(&a, sizeof(T), sizeof(T), 1);
// *value = elem;
// Longer solution (more correct - but I let it waste a bit of memory)
T *value = (T*) alloc(&a, sizeof(T), sizeof(T), 1);
base[end++] = elem;
}
VML_INLINE void pop_back() noexcept {
--end;
}
VML_INLINE uint32_t size() noexcept {
return end;
}
};
#endif // VIRTUAL_MEM_LIST_H

103
arena.h Normal file
View File

@ -0,0 +1,103 @@
// Gradual-commit arena demonstration
// This is free and unencumbered software released into the public domain.
/* Usage:
*
* arena a = newarena((ptrdiff_t)1 << 33);
* if (!alloc(a, size, align, count)) {
* break;
* }
* total += size * count;
*/
#ifndef ARENA_H
#define ARENA_H
#include <stddef.h>
#include <string.h>
static void *os_reserve(ptrdiff_t);
static char os_commit(void *, ptrdiff_t);
#define ARENA_PAGESIZE ((ptrdiff_t)1<<26)
typedef struct {
char *begin;
char *commit;
char *end;
} arena;
static arena newarena(ptrdiff_t cap)
{
arena a = {0};
cap += -cap & (ARENA_PAGESIZE - 1);
a.begin = a.commit = a.end = (char*) os_reserve(cap);
if (a.begin) {
a.end += cap;
}
return a;
}
static void *alloc(arena *a, ptrdiff_t size, ptrdiff_t align, ptrdiff_t count)
{
ptrdiff_t padding = -(size_t)a->begin & (align - 1);
ptrdiff_t committed = a->commit - a->begin;
if (count > (committed-padding)/size) {
ptrdiff_t reserved = a->end - a->begin;
if (count > (reserved-padding)/size) {
return 0;
}
ptrdiff_t needed = size*count + padding - committed;
needed += -needed & (ARENA_PAGESIZE - 1);
if (!os_commit(a->commit, needed)) {
return 0;
}
a->commit += needed;
}
void *ptr = a->begin + padding;
a->begin += padding + size*count;
// Change to this instead if you want zero-inited (but this gets slow with many arenas)
//return memset(ptr, 0, size*count);
return ptr;
}
#ifdef _WIN32
// $ cc -g3 -nostartfiles -o arena.exe arena.c
// $ cl /Z7 arena.c /link /subsystem:console kernel32.lib libvcruntime.lib
#define W32(r) __declspec(dllimport) r __stdcall
W32(void) ExitProcess(int);
W32(void *) VirtualAlloc(void *, ptrdiff_t, int, int);
#define MEM_COMMIT 0x1000
#define MEM_RESERVE 0x2000
#define PAGE_NOACCESS 0x0001
#define PAGE_READWRITE 0x0004
static void *os_reserve(ptrdiff_t cap)
{
return VirtualAlloc(0, cap, MEM_RESERVE, PAGE_NOACCESS);
}
static char os_commit(void *ptr, ptrdiff_t len)
{
return VirtualAlloc(ptr, len, MEM_COMMIT, PAGE_READWRITE);
}
#else // POSIX
// $ cc -g3 -o arena arena.c
#include <sys/mman.h>
static void *os_reserve(ptrdiff_t cap)
{
void *r = mmap(0, cap, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
return r==MAP_FAILED ? 0 : r;
}
static char os_commit(void *ptr, ptrdiff_t len)
{
return !mprotect(ptr, len, PROT_READ|PROT_WRITE);
}
#endif // POSIX
#endif /* ARENA_H */

View File

@ -6,7 +6,10 @@
#include<vector>
#include<stdlib.h>
#include"TurboList.hpp"
#define TLT int
#include "turbolist.h"
#include"arena.h"
// #define PRINT_DBG

View File

@ -1,6 +1,9 @@
#ifndef TURBO_LIST_H
#define TURBO_LIST_H
#ifndef TL_NO_CSTDLIB
#include<cstdlib>
#endif /* TL_NO_CSTDLIB */
#include<stdint.h>
#include<assert.h>
@ -56,11 +59,11 @@ static inline turbolist turbolist_create_adv(void* (*malloc_like)(size_t size),
return tl;
}
#ifndef NO_CSTDLIB
#ifndef TL_NO_CSTDLIB
static inline turbolist turbolist_create() {
return turbolist_create_adv(malloc, free, 0, 16);
}
#endif /* NO_CSTDLIB */
#endif /* TL_NO_CSTDLIB */
static inline void turbolist_delete(turbolist *tl) {
if(tl->nex) tl->free(tl->nex);