From 4e4c266632d64437e17c2c7f815734b952a2a387 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Mon, 21 Oct 2024 14:21:34 +0200 Subject: [PATCH] RAW (can-fail) flags for optimization and non-failing implementation added --- main.cpp | 2 +- simap.h | 62 ++++++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/main.cpp b/main.cpp index ae68fe1..8cce812 100644 --- a/main.cpp +++ b/main.cpp @@ -135,7 +135,7 @@ int main() { test_basics(unomap, &umi); /* Performance tests */ - int i = 100; + int i = 10000; keystore(i, true); datastore(i, true); diff --git a/simap.h b/simap.h index c6b6a99..6d82168 100644 --- a/simap.h +++ b/simap.h @@ -7,8 +7,18 @@ #include "amap.h" #include "arena.h/arena.h" +/* Possible optimizations, but they mean there can be lookup / insert errors (very rarely) +*/ +#define SIMAP_AVX2_RAW +#define SIMAP_RAW + /* Perf trickery */ +/* This unifies the ifdefs but separates code paths when needed */ +#ifdef SIMAP_AVX2_RAW +#define SIMAP_RAW +#endif + /* I have no idea what MSVC has instead... */ #ifdef _MSC_VER #define SM_THREAD_LOCAL __declspec(thread) @@ -41,6 +51,17 @@ union simap_ptr64 { }; typedef union simap_ptr64 simap_ptr64; +struct elem_nonkey_prefix { + /** The value (ptr) */ + simap_ptr64 value; + + /** Previous element index from base (full offset) */ + uint32_t previndex; + /** Next element index from base (full offset) */ + uint32_t nextindex; +}; +typedef struct elem_nonkey_prefix elem_nonkey_prefix; + /** * The per-element storage layout * @@ -61,15 +82,10 @@ typedef union simap_ptr64 simap_ptr64; * and if you want check extra validity by jumping back&forth in it. */ struct elem_prefix { - /** The value (ptr) */ - simap_ptr64 value; + /** Value and meta-data - divisible by 8bytes */ + elem_nonkey_prefix nonkey_prefix; - /** Previous element index from base (full offset) */ - uint32_t previndex; - /** Next element index from base (full offset) */ - uint32_t nextindex; - - /** The prefix of the key */ + /** The prefix of the key - divisible by 8bytes padded string after this (inlined) */ simap_c64 key_prefix; }; typedef struct elem_prefix elem_prefix; @@ -107,10 +123,24 @@ static inline simap_instance simap_create() { static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr); // TODO: We can possibly hand-optimise this with intrinsics maybe - but I hope autovectorization (does not seem to happen???) -static inline SM_ALWAYS_INLINE auint64 *make_tipp(auint64 *tip, auint64 prefix, auint64 *end) { - #pragma GCC unroll 4 +static inline SM_ALWAYS_INLINE auint64 *make_tipp(auint64 *base, auint64 *tip, auint64 prefix, auint64 *end) { +#ifdef SIMAP_AVX2_RAW + /* TODO: Implement */ +#endif +#ifdef SIMAP_RAW + #pragma GCC unroll 16 while((++tip < end) && (*tip != prefix)); return tip; +#endif + /* XXX: This only works because of (***) because reading -1 tips makes tip >= end for sure here and back */ + elem_nonkey_prefix *pre = (elem_nonkey_prefix *)((uint8_t *)tip - sizeof(elem_nonkey_prefix)); + tip = (auint64 *) ((uint8_t *)base + pre->nextindex + sizeof(elem_nonkey_prefix)); + #pragma GCC unroll 16 + while((tip < end) && (*tip != prefix)) { + pre = (elem_nonkey_prefix *)((uint8_t *)tip - sizeof(elem_nonkey_prefix)); + tip = (auint64 *) ((uint8_t *)base + pre->nextindex + sizeof(elem_nonkey_prefix)); + } + return tip; } static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) { @@ -128,8 +158,8 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char /* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */ auint64 *base = (auint64 *) (map->base); auint64 *end = (auint64 *)((uint8_t *)base + (map->usage_end)); - auint64 *tipp = make_tipp(base, prefix.u64, end); - while(tipp < end) { + auint64 *tipp = make_tipp(base, base, prefix.u64, end); + while(tipp < end) { // XXX: (***) /* Need detailed lookup, because found the prefix */ assert((*tipp == prefix.u64)); @@ -138,13 +168,14 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char if(!is_smallkey) { char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t)); if(strcmp(keyremains, tippremains) != 0) { - tipp = make_tipp(tipp, prefix.u64, end); + tipp = make_tipp(base, tipp, prefix.u64, end); continue; } } simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *) (tipp - 2)); +#ifdef SIMAP_RAW /* Check back & forth (jump validation) */ uint32_t previ = *((uint32_t *)(tipp - 1)); if(previ == (uint32_t) -1) { @@ -160,9 +191,10 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char + sizeof(uint32_t)); if(retipp != tipp) { - tipp = make_tipp(tipp, prefix.u64, end); + tipp = make_tipp(base, tipp, prefix.u64, end); continue; } +#endif /* SIMAP_RAW */ /* Can have the (statistically checked) pointer */ return ptr; @@ -229,7 +261,7 @@ static inline void *simap_force_add(simap_instance *map, const char *key, void * uint32_t *usprev = (uint32_t *)((uint8_t *)(map->base) + usi + sizeof(simap_ptr64)); *usprev = previ; - *(usprev + 1) = (uint32_t) -1; + *(usprev + 1) = (uint32_t) -1; /* XXX: (***): ensures the "not < end" here! */ /* 8byte: First 8 char */ simap_c64 *start_str = (simap_c64 *)(usprev + 2);