From 6c1adb16554385aac8541530da37e788ac3266b9 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Fri, 11 Oct 2024 00:54:13 +0200 Subject: [PATCH] perf tests and smaller perf tunes + some experiments --- main.cpp | 17 ++++++++++++++--- makefile | 2 ++ simap.h | 39 ++++++++++++++++++++++++++++++--------- 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/main.cpp b/main.cpp index a25397e..d0fad3b 100644 --- a/main.cpp +++ b/main.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include "amap.h" #include "simap.h" #include "mapmap.hpp" @@ -51,7 +52,17 @@ inline int *datastore(int i, bool create = false) noexcept { } } -void measure_speed(amap mapdo, void *map, int max_key) { +void test_perf(amap mapdo, void *map, int max_key, const char *what) { + auto begin = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < max_key; ++i) { + const char *key = keystore(i); + int *data = datastore(i); + mapdo(map, AMAP_SET, key, data); + } + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - begin); + + printf("Insertion time for %d elements (%s): %.3f ms.\n", max_key, what, elapsed.count() * 1e-6); } void test_basics(amap mapdo, void *map) { @@ -108,8 +119,8 @@ int main() { keystore(i, true); datastore(i, true); - measure_speed(simap, &si, i); - measure_speed(mapmap, &mi, i); + test_perf(simap, &si, i, "simap"); + test_perf(mapmap, &mi, i, "std::map"); return 0; } diff --git a/makefile b/makefile index 5f2414c..9447bce 100644 --- a/makefile +++ b/makefile @@ -2,3 +2,5 @@ debug: g++ main.cpp -g -Wall -o main release: g++ main.cpp -O2 -Wall -o main +release-native: + g++ main.cpp -march=native -O3 -Wall -o main diff --git a/simap.h b/simap.h index ea20b19..ec247c5 100644 --- a/simap.h +++ b/simap.h @@ -7,6 +7,25 @@ #include "amap.h" #include "arena.h/arena.h" +/* Perf trickery */ + +/* I have no idea what MSVC has instead... */ +#ifdef _MSC_VER +#define SM_THREAD_LOCAL __declspec(thread) +#define SM_PREFETCH(x) +#define SM_LIKELY(x) +#define SM_UNLIKELY(x) +#define SM_NOINLINE __declspec(noinline) +#define SM_ALWAYS_INLINE __forceinline +#else +#define SM_THREAD_LOCAL __thread +#define SM_PREFETCH(x) __builtin_prefetch(x) +#define SM_LIKELY(x) __builtin_expect((x),1) +#define SM_UNLIKELY(x) __builtin_expect((x),0) +#define SM_NOINLINE __attribute__ ((noinline)) +#define SM_ALWAYS_INLINE __attribute__ ((always_inline)) +#endif + /** * A "peasantly" map data structure backed by arena.h - basically a toy data structure... * @@ -85,13 +104,14 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char /* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */ const char *keyremains = key + prefixlen; + /* TODO: Maybe this is buggy when we access behind our own data? */ /* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */ /* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */ uint64_t *base = map->base; uint64_t *tipp = map->base; for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) { /* Fast lookup */ - if(*tipp == prefix.u64) { + if((*tipp == prefix.u64)) { /* First check the remains of the string (only if needed) */ if(!is_smallkey) { char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t)); @@ -153,11 +173,11 @@ static inline uint32_t simap_elem_storage_size(const char *key) { padding; } -/** Force-add the (key,value) to the end of the map */ -static inline void *simap_force_add_internal(simap_instance *map, const char *key, void *ptr) { +/** Force-add the (key,value) to the end of the map. Use this if you prefill your map one-by-one and need speed */ +static inline void *simap_force_add(simap_instance *map, const char *key, void *ptr) { uint32_t storage_needed = simap_elem_storage_size(key); assert((storage_needed % 8) == 0); - if(map->end - map->usage_end < storage_needed) { + if(SM_UNLIKELY(map->end - map->usage_end < storage_needed)) { /* Need storage */ aralloc(&(map->a), sizeof(uint8_t)/*esize*/, @@ -201,7 +221,7 @@ static inline void *simap_force_add_internal(simap_instance *map, const char *ke /* XXX: The "padding" gets automagically added by the movement of the arena here(by junk bytes)! */ /* Update previous with linkage */ - if(previ != (uint32_t)-1) { + if(SM_LIKELY(previ != (uint32_t)-1)) { uint32_t *prevnex = (uint32_t *)((uint8_t *)(map->base) + previ + sizeof(simap_ptr64) + sizeof(uint32_t)); @@ -235,7 +255,7 @@ static inline void *simap_force_add_internal(simap_instance *map, const char *ke static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) { simap_instance *map = (simap_instance *) amap_instance; - if(op == AMAP_ERASE) { + if((op == AMAP_ERASE)) { map->prev_usage_end = (uint32_t) -1; map->usage_end = 0; return (void *)((uint8_t)(NULL) - 1L); @@ -249,12 +269,13 @@ static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void } else { assert(op == AMAP_SET); - if(found) { + if((!found)) { + /* Add as new */ + return simap_force_add(map, key, ptr); + } else { /* Just overwrite */ found->ptr = ptr; return (void *) found; - } else { - return simap_force_add_internal(map, key, ptr); } }