From 1c41a4e10619a57fe61d84510bd29cf4e2a8ca1e Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Fri, 11 Oct 2024 02:13:51 +0200 Subject: [PATCH] tried auto-vectorization and simpler codes but does not happen as it says: "missed: not vectorized: no vectype for stmt, scalar_type: auint64" --- main.cpp | 4 ++-- makefile | 6 ++++- simap.h | 73 +++++++++++++++++++++++++++++++------------------------- 3 files changed, 48 insertions(+), 35 deletions(-) diff --git a/main.cpp b/main.cpp index d0fad3b..70188d8 100644 --- a/main.cpp +++ b/main.cpp @@ -115,12 +115,12 @@ int main() { test_basics(mapmap, &mi); /* Performance tests */ - int i = 1000; + int i = 10000; keystore(i, true); datastore(i, true); - test_perf(simap, &si, i, "simap"); test_perf(mapmap, &mi, i, "std::map"); + test_perf(simap, &si, i, "simap"); return 0; } diff --git a/makefile b/makefile index 9447bce..89f37a1 100644 --- a/makefile +++ b/makefile @@ -3,4 +3,8 @@ debug: release: g++ main.cpp -O2 -Wall -o main release-native: - g++ main.cpp -march=native -O3 -Wall -o main + g++ main.cpp -fopt-info-vec-missed -march=native -O3 -Wall -o main +release-avx2: + g++ main.cpp -fopt-info-vec-missed -mavx2 -O3 -Wall -o main +release-avx2-asm: + g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O3 -Wall -o main diff --git a/simap.h b/simap.h index ec247c5..178591b 100644 --- a/simap.h +++ b/simap.h @@ -25,6 +25,7 @@ #define SM_NOINLINE __attribute__ ((noinline)) #define SM_ALWAYS_INLINE __attribute__ ((always_inline)) #endif +typedef uint64_t auint64 __attribute__ ((__aligned__(8))); /** * A "peasantly" map data structure backed by arena.h - basically a toy data structure... @@ -61,7 +62,7 @@ struct simap_instance { uint32_t prev_usage_end; /* previous usage_end or -1 if no previous exists... in bytes!!! */ uint32_t usage_end; /* in bytes!!! */ /** see doc comment for layout and why uint64_t* is the type */ - uint64_t *base; + auint64 *base; }; typedef struct simap_instance simap_instance; @@ -71,7 +72,7 @@ static inline simap_instance simap_create() { ret.end = 0; ret.prev_usage_end = (uint32_t) -1; ret.usage_end = 0; - ret.base = ((uint64_t*) aralloc(&(ret.a), sizeof(uint64_t), sizeof(uint64_t), 1)) /* addr divisible by 8 */ + ret.base = ((auint64*) aralloc(&(ret.a), sizeof(auint64), sizeof(auint64), 1)) /* addr divisible by 8 */ + 1; /* First really addressible thing */ return ret; } @@ -81,17 +82,23 @@ static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void /** The first 8 characters are stored as uint64_t for fast checks */ union simap_c64 { char str8[8]; - uint64_t u64; + auint64 u64; }; typedef union simap_char64 simap_char64; /** This is to ensure 8byte storage of pointers (with possible padding) */ union simap_ptr64 { void *ptr; - uint64_t u64; + auint64 u64; }; typedef union simap_ptr64 simap_ptr64; +// TODO: We can possibly hand-optimise this with intrinsics maybe - but I hope autovectorization +static inline SM_ALWAYS_INLINE auint64 *make_tipp(auint64 *tip, auint64 prefix, auint64 *end) { + while((tip < end) && (*tip != prefix)) ++tip; + return tip; +} + static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) { /* Construct prefix (fast-key) */ size_t keylen = strlen(key); @@ -107,43 +114,45 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char /* TODO: Maybe this is buggy when we access behind our own data? */ /* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */ /* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */ - uint64_t *base = map->base; - uint64_t *tipp = map->base; - for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) { - /* Fast lookup */ - if((*tipp == prefix.u64)) { - /* First check the remains of the string (only if needed) */ - if(!is_smallkey) { - char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t)); - if(strcmp(keyremains, tippremains) != 0) { - continue; - } + auint64 *base = map->base; + auint64 *end = (auint64 *)((uint8_t *)base + (map->usage_end)); + auint64 *tipp = make_tipp(map->base, prefix.u64, end); + while(tipp < end) { + /* Fast lookup, because found prefix */ + assert((*tipp == prefix.u64)); + + /* First check the remains of the string (only if needed) */ + if(!is_smallkey) { + char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t)); + if(strcmp(keyremains, tippremains) != 0) { + continue; } + } - simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *) (tipp - 2)); + simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *) (tipp - 2)); - /* Check back & forth (jump validation) */ - uint32_t previ = *((uint32_t *)(tipp - 1)); - if(previ == (uint32_t) -1) { - /* Expect it be good if it was first insert ever? Statistically rare to be not like it */ - return ptr; - } - uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ - + sizeof(simap_ptr64) - + sizeof(uint32_t)); + /* Check back & forth (jump validation) */ + uint32_t previ = *((uint32_t *)(tipp - 1)); + if(previ == (uint32_t) -1) { + /* Expect it be good if it was first insert ever? Statistically rare to be not like it */ + return ptr; + } + uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ + + sizeof(simap_ptr64) + + sizeof(uint32_t)); - uint64_t *retipp = (uint64_t *)(((uint8_t *)base + prevnexi) + auint64 *retipp = (auint64 *)(((uint8_t *)base + prevnexi) + sizeof(simap_ptr64) + sizeof(uint32_t) + + sizeof(uint32_t)); - if(retipp != tipp) { - continue; - } - - /* Can have the (statistically checked) pointer */ - return ptr; + if(retipp != tipp) { + continue; } + + /* Can have the (statistically checked) pointer */ + return ptr; } + tipp = make_tipp(map->base, prefix.u64, end); /* Haven't found anything */ return NULL;