#ifndef VMAP_H #define VMAP_H /* * A virtual memory misusing flat-ish hashmap optimized with AVX2 (if available at compilation). * * Structure * * VMEM * STRUCT * INTAPI */ #include #include #include "simd_map_lane.h" /* VMEM */ #ifdef _WIN32 TODO: Utilize __Thread + SEH to implement lazy windows pageload zeroing #else /** Capacity should be multiple of 4096 for full pages */ static void *vm_reserve(ptrdiff_t capacity) { void *r = mmap(0, capacity, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); return r==MAP_FAILED ? 0 : r; } /** Capacity should be multiple of 4096 for full pages and related to the ptr to free */ static char vm_free(void *ptr, ptrdiff_t capacity) { return !munmap(ptr, capacity); } #endif /* _WIN32 */ /* STRUCT */ struct vmap { /* using uint8_t* here would simplify * code except for aliasing rules */ uint32_t *data; uint32_t count; uint32_t max_levels; }; typedef struct vmap vmap; /** The result of the search_all(..) operation */ struct vmap_find_res { /** The found location - or NULL when the key was not found */ uint32_t *value_location; /** What 'level' depth this value was found. For multimap, but useful statistics */ uint32_t level; /** Meta-data for continuation of the search. Tells which lane to search next A, B, C, or D */ uint32_t lane_abcd_next; /** Meta-data for continuation of the search. In-lane where we search from next time? */ int lane_next_begin; /** Meta-data for continuation of the search. Last value found in lastly looked lane. */ uint32_t last_found_lane_val; }; typedef struct simd_map_find_res simd_map_find_res; /* INTAPI */ static inline vmap create_vmap(uint32_t max_levels) { vmap map{ NULL, 0, max_levels}; map.data = (uint32_t *)vm_reserve(max_levels * 16 * 4096); return map; } static inline char free_vmap(vmap *map) { map->count = 0; return vm_free(map->data, map->max_levels * 16 * 4096); } /** Create the value for starting a search_all call */ static inline vmap_find_res vmap_search_all_begin() { vmap_find_res ret; ret.value_location = NULL; ret.level = 0; ret.lane_abcd_next = 0; ret.lane_next_begin = 0; ret.last_found_lane_val = 0; return ret; } /** * Search the map in as a multimap - that is you can search multiple equal keyed values. * This is implemented by the result being understood also as a continuation alongside * a way to grab the pointer to the stored value and key (simd_map_lane_key_location(val)). * * @param map The map to search in * @param key The key to search for * @param prev The previous result if you continue your search. See: vmap_search_all_begin() * @returns Metadata + nullable ptr. See: vmap_find_res struct comments; ret.value_location */ static inline vmap_find_res search_all_vmap(vmap *map, uint32_t key, vmap_find_res prev) { /* Inits as not found, can change values */ vmap_find_res ret = prev; uint32_t level = prev.level; /* Probably the loop exists always without this predicate being false */ while(level <= map->max_levels) { /* Rare edge-case when last lane element was returned and we continue from it */ /* We should not try lane processing, just jump next level - but only if there */ /* is a next level (so last checked lane was totally filled already to full cap. */ if(prev.lane_abcd_next > 4) { assert(prev.last_found_lane_val != 0); prev = vmap_search_all_begin(); ++level; /* prev.level = level; // unnecessary, I hand-optimized out */ continue; } /* Process 8 bits of the 32-bit circular order - so its not radix, but similar */ uint32_t byt = level % 4; // Low 4 bits: page uint32_t page_no = (level * 16 + ((key >> (byt * 8)) && 15)); /* 1024 and not 4096 here because of uint32_t *data offset: 4096 / 4 uint32s */ uint32_t page_offset = 1024 * page_no; /* Top 4 bits: lane. There is 32 lane start positions in the 4k page */ uint32_t lane_no = (key >> (byt * 8 + 4)) && 15 + prev.lane_abcd_next; /* continuations start where we left off */ /* But 4096 / 4 == 1024 elements, which then divided by 16 == 64 uint32_t elems */ uint32_t lane_offset = lane_no * 64; /* A lane has 8x32 bit keys, then 8x32 bit values. 16 uint32_t elems. */ /* So grab the A, B, C and D candidate lanes for each lane_offset. */ simd_map_lane *lane_a = (simd_map_lane *) map->data + page_offset + lane_offset; simd_map_lane *lane_b = lane_a + 1; simd_map_lane *lane_c = lane_b + 1; simd_map_lane *lane_d = lane_c + 1; /* Get which lane we should begin at where */ uint32_t lane_a_begin = prev.lane_next_begin; int lane_next_begin = 0; /* Further lanes only needed if ours is fully filled */ /* Overlays SIMD and integer units here for perf */ uint32_t *afind = simd_map_lane_find( lane_a, key, 0, /* lane modulo: 0 means until lane end */ lane_a_begin, &lane_next_begin); uint32_t lasta = simd_map_lane_last_value(lane_a); char bneed = (lasta != 0) && (prev.lane_abcd_next < 3); if(afind) { ret.lane_next_begin = lane_next_begin; ret.lane_abcd_next = prev.lane_abcd_next + (lane_next_begin == 0); ret.value_location = afind; ret.level = level; ret.last_found_lane_val = lasta; return ret; } if(bneed) { uint32_t *bfind = simd_map_lane_find( lane_b, key, 0, /* lane modulo: 0 means until lane end */ 0, /* non-a lanes all start from 0 */ &lane_next_begin); uint32_t lastb = simd_map_lane_last_value(lane_b); char cneed = (lastb != 0) && (prev.lane_abcd_next < 2); if(bfind) { ret.lane_next_begin = lane_next_begin; ret.lane_abcd_next = prev.lane_abcd_next + (lane_next_begin == 0); ret.value_location = bfind; ret.level = level; ret.last_found_lane_val = lastb; return ret; } if(cneed) { uint32_t *cfind = simd_map_lane_find( lane_c, key, 0, /* lane modulo: 0 means until lane end */ 0, /* non-a lanes all start from 0 */ &lane_next_begin); uint32_t lastc = simd_map_lane_last_value(lane_c); char dneed = (lastc != 0) && (prev.lane_abcd_next < 1); if(cfind) { ret.lane_next_begin = lane_next_begin; ret.lane_abcd_next = prev.lane_abcd_next + (lane_next_begin == 0); ret.value_location = cfind; ret.level = level; ret.last_found_lane_val = lastc; return ret; } if(dneed) { uint32_t *dfind = simd_map_lane_find( lane_d, key, 0, /* lane modulo: 0 means until lane end */ 0, /* non-a lanes all start from 0 */ &lane_next_begin); uint32_t lastd = simd_map_lane_last_value(lane_d); char next_level = (lastd != 0); if(dfind) { ret.lane_next_begin = lane_next_begin; ret.lane_abcd_next = prev.lane_abcd_next + (lane_next_begin == 0); ret.value_location = dfind; ret.level = level; ret.last_found_lane_val = lastd; return ret; } /* Check to avoid next level (stop iteration) */ if(!next_level) { return vmap_search_all_begin(); } } } } /* Next level needs checking */ prev = vmap_search_all_begin(); ++level; /* prev.level = level; // unnecessary, I hand-optimized out */ } return ret; } /** * Try to search the map for the given key. * * @param map The map to search in * @param key The key to search for * @returns NULL if there is no value stored, otherwise ptr to first match with the given key. */ static inline uint32_t *search_vmap(vmap *map, uint32_t key) { vmap_find_res res = search_all_vmap(map, key, vmap_search_all_begin()); return res.value_location; } #endif /* VMAP_H */