diff --git a/main.cpp b/main.cpp index ae68fe1..9a52f95 100644 --- a/main.cpp +++ b/main.cpp @@ -7,6 +7,7 @@ #include "simap.h" #include "mapmap.hpp" #include "unomap.hpp" +#include "simd_map.h" /** * Creates keys or returns the ith key. Used for performance tests. @@ -123,7 +124,7 @@ void test_basics(amap mapdo, void *map) { assert(*iptr == 3); } -int main() { +void test_stringmaps() { /* Basic tests */ simap_instance si = simap_create(); test_basics(simap, &si); @@ -139,9 +140,61 @@ int main() { keystore(i, true); datastore(i, true); + puts("Performance testing stringmaps:"); + puts(""); test_perf(mapmap, &mi, i, "std::map"); test_perf(simap, &si, i, "simap"); test_perf(unomap, &umi, i, "std::unordered_map"); +} + +void test_simd_map_basics() { + /* Empty free tests */ + simd_map smap = simd_map_create(); + simd_map_free(&smap); + + /* Re-create */ + smap = simd_map_create(); + + /* Empty search */ + assert(simd_map_find(&smap, 42) == NULL); + + /* Insertions */ + assert(simd_map_set(&smap, 40, 0) != 0); + assert(simd_map_set(&smap, 41, 1) != 0); + assert(simd_map_set(&smap, 42, 2) != 0); + + /* Searches */ + assert(*simd_map_find(&smap, 40) == 0); + assert(*simd_map_find(&smap, 41) == 1); + assert(*simd_map_find(&smap, 42) == 2); + + /* Test erase */ + simd_map_erase(&smap); + assert(simd_map_find(&smap, 42) == NULL); + assert(simd_map_set(&smap, 42, 2) != 0); + assert(*simd_map_find(&smap, 42) == 2); + + /* Test a bit more */ + int cnt = 100; + for(int i = 0; i < cnt; ++i) { + assert(simd_map_set(&smap, i, (cnt - i)) != 0); + } + for(int i = 0; i < cnt; ++i) { + assert(*simd_map_find(&smap, i) == (cnt - i)); + } + + /* Filled free */ + simd_map_free(&smap); +} + +void test_intmaps() { + /* Basic tests */ + test_simd_map_basics(); +} + +int main() { + test_intmaps(); + test_stringmaps(); return 0; } diff --git a/simd_map.h b/simd_map.h new file mode 100644 index 0000000..61e2aae --- /dev/null +++ b/simd_map.h @@ -0,0 +1,189 @@ +#ifndef SIMD_MAP +#define SIMD_MAP + +#include /* NULL */ +#include /* uint32_t, ... */ +#include +#include "arena.h/arena.h" + +/* SIMD support */ +#ifdef __AVX2__ +#include +#endif + +#ifdef __SSE2__ +#include +#endif + +/* I have no idea what MSVC has instead... */ +#ifdef _MSC_VER +#define SM_THREAD_LOCAL __declspec(thread) +#define SM_PREFETCH(x) +#define SM_LIKELY(x) +#define SM_UNLIKELY(x) +#define SM_NOINLINE __declspec(noinline) +#define SM_ALWAYS_INLINE __forceinline +#else +#define SM_THREAD_LOCAL __thread +#define SM_PREFETCH(x) __builtin_prefetch(x) +#define SM_LIKELY(x) __builtin_expect((x),1) +#define SM_UNLIKELY(x) __builtin_expect((x),0) +#define SM_NOINLINE __attribute__ ((noinline)) +#define SM_ALWAYS_INLINE __attribute__ ((always_inline)) +#endif + +/* 32 byte = 256 bits = (8 * 32bit) optimized for AVX2 */ +#define SM_LANE_SPAN 8 + +/** Grouped together keys-values to support SIMD more this way (also became a single cache line this way) */ +struct simd_map_lane { + uint32_t keys[SM_LANE_SPAN]; + uint32_t values[SM_LANE_SPAN]; +}; +typedef struct simd_map_elem simd_map_elem; + +struct simd_map { + arena a; + simd_map_lane *lanes; + uint32_t end; /* in lanes */ + uint32_t usage_end; /* in lanes!!! */ + int lane_modulo; /* [0..SM_LANE_SPAN) */ +}; +typedef struct simd_map simd_map; + +/** Create a simd map instance */ +static inline SM_ALWAYS_INLINE simd_map simd_map_create() { + simd_map ret; + ret.a = newarena((ptrdiff_t)1 << 33); + ret.end = 0; + ret.usage_end = 0; + ret.lanes = (simd_map_lane*)(((auint64*) aralloc(&(ret.a), sizeof(uint64_t), sizeof(simd_map_lane), 1)) /* aligned! */ + + 1); /* First really addressible thing */ + ret.lane_modulo = 0; + return ret; +} + +/** Free all resources held by the map. Returns 0 on errors. */ +static inline SM_ALWAYS_INLINE char simd_map_free(simd_map *map) { + return freearena(&(map->a)); +} + +/** + * Returns if this key is stored in the given map LANE or not - returns NULL if not found. + * + * @param map_lane The lane to find in. + * @param key The key to search for. + * @param lane_modulo When non-zero, the lane only searched by this many elements. Zero means all elements. (mod lane length) + * @returns NULL when not found, otherwise pointer to the stored value for the key. + */ +static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(simd_map_lane *map_lane, uint32_t key, int lane_modulo) { + uint32_t *keys = map_lane->keys; + uint32_t *values = map_lane->values; + + if(SM_UNLIKELY(lane_modulo)) goto non_simd_modulo; +#ifdef __AVX2__ + /* TODO */ +#endif +#ifdef __SSE2__ + /* TODO */ +#endif + /* Regular integer code - should have good ILP and cache locality patterns anyways */ + if(lane_modulo == 0) { + /** Pretty hopeful this can get more easily inlined */ + for(int i = 0; i < SM_LANE_SPAN; ++i) { + if(SM_UNLIKELY(keys[i] == key)) { + return &values[i]; + } + } + + return NULL; + } else { +non_simd_modulo: + for(int i = 0; i < lane_modulo; ++i) { + if(SM_UNLIKELY(keys[i] == key)) { + return &values[i]; + } + } + + return NULL; + } +} + +/** Returns if this key is stored in the map or not - returns NULL if does not exists. */ +static inline uint32_t *simd_map_find(simd_map *map, uint32_t key) { + /* Do not process last element because of last incomplete lane */ + if(map->usage_end > 0) for(uint32_t i = 0; i < map->usage_end - 1; ++i) { + uint32_t *found = simd_map_lane_find(&(map->lanes[i]), key, 0); + if(found) return found; + } + + /* Process last lane - with a modulo lane */ + if(map->usage_end > 0) { + uint32_t *found = simd_map_lane_find( + &(map->lanes[map->usage_end - 1]), + key, + map->lane_modulo); + if(found) return found; + } + + /* Not found */ + return NULL; +} + +/** Useful if you know the key have never been before added (faster)! Returns 0 on errors, otherwise 1. */ +static inline char simd_map_force_insert(simd_map *map, uint32_t key, uint32_t value) { + /* Handle storage growth needs. */ + uint32_t storage_needed = (map->lane_modulo == 0) ? 1 : 0; + if(SM_UNLIKELY(map->end - map->usage_end < storage_needed)) { + void *allret = aralloc(&(map->a), + sizeof(simd_map_lane)/* esize */, + 1 /* align - should be sizeof(simd_map_lane) but should be aligned here as-is already! */, + storage_needed); /* ecount */ + + /** Return early with error but no state changes if we cannot allocate! */ + if(SM_UNLIKELY(!allret)) { + return 0; + } + + /* Administer end offset */ + map->end += storage_needed; + } + + /* Administer usage end offset, separate from end because erase / shrink ops possible */ + map->usage_end += storage_needed; + + /* Always force-insert into the last lane at lane_modulo location */ + simd_map_lane *lane = &(map->lanes[map->usage_end - 1]); + lane->keys[map->lane_modulo] = key; + lane->values[map->lane_modulo] = value; + + /* Update lane modulo */ + map->lane_modulo = (map->lane_modulo + 1) % SM_LANE_SPAN; + + return 1; +} + +/** Returns 0 on errors, otherwise 1 when added as new, 2 when already found got overwritten */ +static inline char simd_map_set(simd_map *map, uint32_t key, uint32_t value) { + uint32_t *found = simd_map_find(map, key); + if(!found) { + return simd_map_force_insert(map, key, value); + } else { + /* Overwrite already existing mapping */ + *found = value; + return 2; + } +} + +/** Empties the map - this does not free resources, just makes it reusable! */ +static inline void simd_map_erase(simd_map *map) { + map->usage_end = 0; + map->lane_modulo = 0; +} + +/** Remove the given key from the map so its not stored anymore. Returns 1 when found and removed, 0 otherwise. */ +static inline int simd_map_remove(simd_map *map, uint32_t key) { + assert(0); // TODO: Implement by swapping to end + shrink! +} + +#endif