tried auto-vectorization and simpler codes but does not happen as it says: "missed: not vectorized: no vectype for stmt, scalar_type: auint64"

This commit is contained in:
Richard Thier 2024-10-11 02:13:51 +02:00
parent 6c1adb1655
commit 1c41a4e106
3 changed files with 48 additions and 35 deletions

View File

@ -115,12 +115,12 @@ int main() {
test_basics(mapmap, &mi);
/* Performance tests */
int i = 1000;
int i = 10000;
keystore(i, true);
datastore(i, true);
test_perf(simap, &si, i, "simap");
test_perf(mapmap, &mi, i, "std::map");
test_perf(simap, &si, i, "simap");
return 0;
}

View File

@ -3,4 +3,8 @@ debug:
release:
g++ main.cpp -O2 -Wall -o main
release-native:
g++ main.cpp -march=native -O3 -Wall -o main
g++ main.cpp -fopt-info-vec-missed -march=native -O3 -Wall -o main
release-avx2:
g++ main.cpp -fopt-info-vec-missed -mavx2 -O3 -Wall -o main
release-avx2-asm:
g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O3 -Wall -o main

73
simap.h
View File

@ -25,6 +25,7 @@
#define SM_NOINLINE __attribute__ ((noinline))
#define SM_ALWAYS_INLINE __attribute__ ((always_inline))
#endif
typedef uint64_t auint64 __attribute__ ((__aligned__(8)));
/**
* A "peasantly" map data structure backed by arena.h - basically a toy data structure...
@ -61,7 +62,7 @@ struct simap_instance {
uint32_t prev_usage_end; /* previous usage_end or -1 if no previous exists... in bytes!!! */
uint32_t usage_end; /* in bytes!!! */
/** see doc comment for layout and why uint64_t* is the type */
uint64_t *base;
auint64 *base;
};
typedef struct simap_instance simap_instance;
@ -71,7 +72,7 @@ static inline simap_instance simap_create() {
ret.end = 0;
ret.prev_usage_end = (uint32_t) -1;
ret.usage_end = 0;
ret.base = ((uint64_t*) aralloc(&(ret.a), sizeof(uint64_t), sizeof(uint64_t), 1)) /* addr divisible by 8 */
ret.base = ((auint64*) aralloc(&(ret.a), sizeof(auint64), sizeof(auint64), 1)) /* addr divisible by 8 */
+ 1; /* First really addressible thing */
return ret;
}
@ -81,17 +82,23 @@ static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void
/** The first 8 characters are stored as uint64_t for fast checks */
union simap_c64 {
char str8[8];
uint64_t u64;
auint64 u64;
};
typedef union simap_char64 simap_char64;
/** This is to ensure 8byte storage of pointers (with possible padding) */
union simap_ptr64 {
void *ptr;
uint64_t u64;
auint64 u64;
};
typedef union simap_ptr64 simap_ptr64;
// TODO: We can possibly hand-optimise this with intrinsics maybe - but I hope autovectorization
static inline SM_ALWAYS_INLINE auint64 *make_tipp(auint64 *tip, auint64 prefix, auint64 *end) {
while((tip < end) && (*tip != prefix)) ++tip;
return tip;
}
static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) {
/* Construct prefix (fast-key) */
size_t keylen = strlen(key);
@ -107,43 +114,45 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char
/* TODO: Maybe this is buggy when we access behind our own data? */
/* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */
/* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */
uint64_t *base = map->base;
uint64_t *tipp = map->base;
for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) {
/* Fast lookup */
if((*tipp == prefix.u64)) {
/* First check the remains of the string (only if needed) */
if(!is_smallkey) {
char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t));
if(strcmp(keyremains, tippremains) != 0) {
continue;
}
auint64 *base = map->base;
auint64 *end = (auint64 *)((uint8_t *)base + (map->usage_end));
auint64 *tipp = make_tipp(map->base, prefix.u64, end);
while(tipp < end) {
/* Fast lookup, because found prefix */
assert((*tipp == prefix.u64));
/* First check the remains of the string (only if needed) */
if(!is_smallkey) {
char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t));
if(strcmp(keyremains, tippremains) != 0) {
continue;
}
}
simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *) (tipp - 2));
simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *) (tipp - 2));
/* Check back & forth (jump validation) */
uint32_t previ = *((uint32_t *)(tipp - 1));
if(previ == (uint32_t) -1) {
/* Expect it be good if it was first insert ever? Statistically rare to be not like it */
return ptr;
}
uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ
+ sizeof(simap_ptr64)
+ sizeof(uint32_t));
/* Check back & forth (jump validation) */
uint32_t previ = *((uint32_t *)(tipp - 1));
if(previ == (uint32_t) -1) {
/* Expect it be good if it was first insert ever? Statistically rare to be not like it */
return ptr;
}
uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ
+ sizeof(simap_ptr64)
+ sizeof(uint32_t));
uint64_t *retipp = (uint64_t *)(((uint8_t *)base + prevnexi)
auint64 *retipp = (auint64 *)(((uint8_t *)base + prevnexi)
+ sizeof(simap_ptr64) + sizeof(uint32_t) +
+ sizeof(uint32_t));
if(retipp != tipp) {
continue;
}
/* Can have the (statistically checked) pointer */
return ptr;
if(retipp != tipp) {
continue;
}
/* Can have the (statistically checked) pointer */
return ptr;
}
tipp = make_tipp(map->base, prefix.u64, end);
/* Haven't found anything */
return NULL;