diff --git a/main.cpp b/main.cpp index 3f274f3..457ecc6 100644 --- a/main.cpp +++ b/main.cpp @@ -8,6 +8,7 @@ #include "mapmap.hpp" #include "unomap.hpp" #include "simd_map.h" +#include "vmap.h" /** * Creates keys or returns the ith key. Used for performance tests. @@ -309,7 +310,7 @@ void test_intmaps(int perf_test_i) { } int main() { - int perf_test_i = 100; + int perf_test_i = 1000; /* Prepare data stores */ keystore(perf_test_i, true); diff --git a/vmap.h b/vmap.h new file mode 100644 index 0000000..bb99273 --- /dev/null +++ b/vmap.h @@ -0,0 +1,162 @@ +#ifndef VMAP_H +#define VMAP_H +/* + * A virtual memory misusing flat-ish hashmap optimized with AVX2. + * + * Structure + * + * VMEM + * STRUCT + * PRIVATE + * UINTAPI + */ +#include +#include "simd_map_lane.h" + +/* VMEM */ + +#ifdef _WIN32 +TODO: Utilize __Thread + SEH to implement lazy windows pageload zeroing +#else +/** Capacity should be multiple of 4096 for full pages */ +static void *vm_reserve(ptrdiff_t capacity) { + void *r = mmap(0, capacity, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); + return r==MAP_FAILED ? 0 : r; +} + +/** Capacity should be multiple of 4096 for full pages and related to the ptr to free */ +static char vm_free(void *ptr, ptrdiff_t capacity) { + return !munmap(ptr, capacity); +} +#endif /* _WIN32 */ + +/* STRUCT */ + +struct vmap { + /* using uint8_t* here would simplify + * code except for aliasing rules */ + uint32_t *data; + uint32_t count; + uint32_t max_levels; +}; +typedef struct vmap vmap; + +/** The result of the search_all(..) operation */ +struct vmap_find_res { + /** The found location - or NULL when the key was not found */ + uint32_t *value_location; + /** What 'level' depth this value was found. For multimap, but useful statistics */ + uint32_t level; + /** Meta-data for continuation of the search. Tells which lane to search next A, B, C, or D */ + uint32_t lane_abcd_next; + /** Meta-data for continuation of the search. In-lane where we search from next time? */ + int lane_next_begin; +}; +typedef struct simd_map_find_res simd_map_find_res; + +/* PRIVATE */ + +/* UINTAPI */ + +static inline vmap create_vmap(uint32_t max_levels) { + vmap map{ NULL, 0, max_levels}; + map.data = (uint32_t *)vm_reserve(max_levels * 16 * 4096); + return map; +} + +static inline char free_vmap(vmap *map) { + map->count = 0; + return vm_free(map->data, map->max_levels * 16 * 4096); +} + +/** Create the value for starting a search_all call */ +static inline vmap_find_res vmap_search_all_begin() { + vmap_find_res ret; + ret.value_location = NULL; + ret.level = 0; + ret.lane_abcd_next = 0; + ret.lane_next_begin = 0; + return ret; +} + +/** + * Search the map in as a multimap - that is you can search multiple equal keyed values. + * This is implemented by the result being understood also as a continuation alongside + * a way to grab the pointer to the stored value and key (simd_map_lane_key_location(val)). + * + * @param map The map to search in + * @param key The key to search for + * @param prev The previous result if you continue your search. See: vmap_search_all_begin() + * @returns Metadata + nullable ptr. See: vmap_find_res struct comments; ret.value_location + */ +static inline vmap_find_res search_all_vmap(vmap *map, uint32_t key, vmap_find_res prev) { + /* Inits as not found, can change values */ + vmap_find_res ret = prev; + + uint32_t level = prev.level; + /* Probably the loop exists always without this predicate being false */ + while(level <= map->max_levels) { + /* Process 8 bits of the 32-bit circular order - so its not radix, but similar */ + uint32_t byt = level % 4; + // Low 4 bits: page + uint32_t page_no = (level * 16 + ((key >> (byt * 8)) && 15)); + /* 1024 and not 4096 here because of uint32_t *data offset: 4096 / 4 uint32s */ + uint32_t page_offset = 1024 * page_no; + + /* Top 4 bits: lane. There is 32 lane start positions in the 4k page */ + uint32_t lane_no = (key >> (byt * 8 + 4)) && 15; + /* But 4096 / 4 == 1024 elements, which then divided by 16 == 64 uint32_t elems */ + uint32_t lane_offset = lane_no * 64; + +// FIXME: Rerhink what is needed for continuations! +// I think we should store A, B, C and D lane retvals plus where we are +// or maybe just the "where we are" and figure out with logic here, +// but maybe I need to just save flags (4x1 bytes) for "does lane-ABCD search needed?" as that is faster to simd branch pred? + + /* A lane has 8x32 bit keys, then 8x32 bit values. 16 uint32_t elems. */ + /* So grab the A, B, C and D candidate lanes for each lane_offset. */ + simd_map_lane *lane_a = (simd_map_lane *) map->data + page_offset + lane_offset; + simd_map_lane *lane_b = lane_a + 1; + simd_map_lane *lane_c = lane_b + 1; + simd_map_lane *lane_d = lane_c + 1; + + /* Further lanes only needed if ours is fully filled */ + /* Overlay simd and integer units here for perf */ + uint32_t *afind = simd_map_lane_find( + lane_a, + key, + 0, /* lane modulo: 0 means until lane end */ + 0, /* FIXME - from continuation! */ + NULL); /* FIXME - we should fill a *lane_next_begin ptr here */ + uint32_t bneed = simd_map_lane_last_value(lane_a); + if(afind) { + ret.value_location = afind; + ret.level = level; + } + /* TODO: Implement B, C and D */ + + uint32_t cneed = simd_map_lane_last_value(lane_b); + uint32_t dneed = simd_map_lane_last_value(lane_c); + + /* Check if we need to jump to the next level and do */ + uint32_t more = simd_map_lane_last_value(lane_c); + if(!more) return ret; + ++level; + } + + return ret; +} + +/** + * Try to search the map for the given key. + * + * @param map The map to search in + * @param key The key to search for + * @returns NULL if there is no value stored, otherwise ptr to first match with the given key. + */ +static inline uint32_t *search_vmap(vmap *map, uint32_t key) { + vmap_find_res res = search_all_vmap(map, key, vmap_search_all_begin()); + return res.value_location; +} + +#endif /* VMAP_H */