RAW (can-fail) flags for optimization and non-failing implementation added

This commit is contained in:
Richard Thier 2024-10-21 14:21:34 +02:00
parent 418c8d289c
commit 4e4c266632
2 changed files with 48 additions and 16 deletions

View File

@ -135,7 +135,7 @@ int main() {
test_basics(unomap, &umi);
/* Performance tests */
int i = 100;
int i = 10000;
keystore(i, true);
datastore(i, true);

62
simap.h
View File

@ -7,8 +7,18 @@
#include "amap.h"
#include "arena.h/arena.h"
/* Possible optimizations, but they mean there can be lookup / insert errors (very rarely)
*/
#define SIMAP_AVX2_RAW
#define SIMAP_RAW
/* Perf trickery */
/* This unifies the ifdefs but separates code paths when needed */
#ifdef SIMAP_AVX2_RAW
#define SIMAP_RAW
#endif
/* I have no idea what MSVC has instead... */
#ifdef _MSC_VER
#define SM_THREAD_LOCAL __declspec(thread)
@ -41,6 +51,17 @@ union simap_ptr64 {
};
typedef union simap_ptr64 simap_ptr64;
struct elem_nonkey_prefix {
/** The value (ptr) */
simap_ptr64 value;
/** Previous element index from base (full offset) */
uint32_t previndex;
/** Next element index from base (full offset) */
uint32_t nextindex;
};
typedef struct elem_nonkey_prefix elem_nonkey_prefix;
/**
* The per-element storage layout
*
@ -61,15 +82,10 @@ typedef union simap_ptr64 simap_ptr64;
* and if you want check extra validity by jumping back&forth in it.
*/
struct elem_prefix {
/** The value (ptr) */
simap_ptr64 value;
/** Value and meta-data - divisible by 8bytes */
elem_nonkey_prefix nonkey_prefix;
/** Previous element index from base (full offset) */
uint32_t previndex;
/** Next element index from base (full offset) */
uint32_t nextindex;
/** The prefix of the key */
/** The prefix of the key - divisible by 8bytes padded string after this (inlined) */
simap_c64 key_prefix;
};
typedef struct elem_prefix elem_prefix;
@ -107,10 +123,24 @@ static inline simap_instance simap_create() {
static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr);
// TODO: We can possibly hand-optimise this with intrinsics maybe - but I hope autovectorization (does not seem to happen???)
static inline SM_ALWAYS_INLINE auint64 *make_tipp(auint64 *tip, auint64 prefix, auint64 *end) {
#pragma GCC unroll 4
static inline SM_ALWAYS_INLINE auint64 *make_tipp(auint64 *base, auint64 *tip, auint64 prefix, auint64 *end) {
#ifdef SIMAP_AVX2_RAW
/* TODO: Implement */
#endif
#ifdef SIMAP_RAW
#pragma GCC unroll 16
while((++tip < end) && (*tip != prefix));
return tip;
#endif
/* XXX: This only works because of (***) because reading -1 tips makes tip >= end for sure here and back */
elem_nonkey_prefix *pre = (elem_nonkey_prefix *)((uint8_t *)tip - sizeof(elem_nonkey_prefix));
tip = (auint64 *) ((uint8_t *)base + pre->nextindex + sizeof(elem_nonkey_prefix));
#pragma GCC unroll 16
while((tip < end) && (*tip != prefix)) {
pre = (elem_nonkey_prefix *)((uint8_t *)tip - sizeof(elem_nonkey_prefix));
tip = (auint64 *) ((uint8_t *)base + pre->nextindex + sizeof(elem_nonkey_prefix));
}
return tip;
}
static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) {
@ -128,8 +158,8 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char
/* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */
auint64 *base = (auint64 *) (map->base);
auint64 *end = (auint64 *)((uint8_t *)base + (map->usage_end));
auint64 *tipp = make_tipp(base, prefix.u64, end);
while(tipp < end) {
auint64 *tipp = make_tipp(base, base, prefix.u64, end);
while(tipp < end) { // XXX: (***)
/* Need detailed lookup, because found the prefix */
assert((*tipp == prefix.u64));
@ -138,13 +168,14 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char
if(!is_smallkey) {
char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t));
if(strcmp(keyremains, tippremains) != 0) {
tipp = make_tipp(tipp, prefix.u64, end);
tipp = make_tipp(base, tipp, prefix.u64, end);
continue;
}
}
simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *) (tipp - 2));
#ifdef SIMAP_RAW
/* Check back & forth (jump validation) */
uint32_t previ = *((uint32_t *)(tipp - 1));
if(previ == (uint32_t) -1) {
@ -160,9 +191,10 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char
+ sizeof(uint32_t));
if(retipp != tipp) {
tipp = make_tipp(tipp, prefix.u64, end);
tipp = make_tipp(base, tipp, prefix.u64, end);
continue;
}
#endif /* SIMAP_RAW */
/* Can have the (statistically checked) pointer */
return ptr;
@ -229,7 +261,7 @@ static inline void *simap_force_add(simap_instance *map, const char *key, void *
uint32_t *usprev = (uint32_t *)((uint8_t *)(map->base) + usi +
sizeof(simap_ptr64));
*usprev = previ;
*(usprev + 1) = (uint32_t) -1;
*(usprev + 1) = (uint32_t) -1; /* XXX: (***): ensures the "not < end" here! */
/* 8byte: First 8 char */
simap_c64 *start_str = (simap_c64 *)(usprev + 2);