#ifndef VMAP_H #define VMAP_H /* * A virtual memory misusing flat-ish hashmap optimized with AVX2. * * Structure * * VMEM * STRUCT * PRIVATE * UINTAPI */ #include #include "simd_map_lane.h" /* VMEM */ #ifdef _WIN32 TODO: Utilize __Thread + SEH to implement lazy windows pageload zeroing #else /** Capacity should be multiple of 4096 for full pages */ static void *vm_reserve(ptrdiff_t capacity) { void *r = mmap(0, capacity, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); return r==MAP_FAILED ? 0 : r; } /** Capacity should be multiple of 4096 for full pages and related to the ptr to free */ static char vm_free(void *ptr, ptrdiff_t capacity) { return !munmap(ptr, capacity); } #endif /* _WIN32 */ /* STRUCT */ struct vmap { /* using uint8_t* here would simplify * code except for aliasing rules */ uint32_t *data; uint32_t count; uint32_t max_levels; }; typedef struct vmap vmap; /** The result of the search_all(..) operation */ struct vmap_find_res { /** The found location - or NULL when the key was not found */ uint32_t *value_location; /** What 'level' depth this value was found. For multimap, but useful statistics */ uint32_t level; /** Meta-data for continuation of the search. Tells which lane to search next A, B, C, or D */ uint32_t lane_abcd_next; /** Meta-data for continuation of the search. In-lane where we search from next time? */ int lane_next_begin; }; typedef struct simd_map_find_res simd_map_find_res; /* PRIVATE */ /* UINTAPI */ static inline vmap create_vmap(uint32_t max_levels) { vmap map{ NULL, 0, max_levels}; map.data = (uint32_t *)vm_reserve(max_levels * 16 * 4096); return map; } static inline char free_vmap(vmap *map) { map->count = 0; return vm_free(map->data, map->max_levels * 16 * 4096); } /** Create the value for starting a search_all call */ static inline vmap_find_res vmap_search_all_begin() { vmap_find_res ret; ret.value_location = NULL; ret.level = 0; ret.lane_abcd_next = 0; ret.lane_next_begin = 0; return ret; } /** * Search the map in as a multimap - that is you can search multiple equal keyed values. * This is implemented by the result being understood also as a continuation alongside * a way to grab the pointer to the stored value and key (simd_map_lane_key_location(val)). * * @param map The map to search in * @param key The key to search for * @param prev The previous result if you continue your search. See: vmap_search_all_begin() * @returns Metadata + nullable ptr. See: vmap_find_res struct comments; ret.value_location */ static inline vmap_find_res search_all_vmap(vmap *map, uint32_t key, vmap_find_res prev) { /* Inits as not found, can change values */ vmap_find_res ret = prev; uint32_t level = prev.level; /* Probably the loop exists always without this predicate being false */ while(level <= map->max_levels) { /* Process 8 bits of the 32-bit circular order - so its not radix, but similar */ uint32_t byt = level % 4; // Low 4 bits: page uint32_t page_no = (level * 16 + ((key >> (byt * 8)) && 15)); /* 1024 and not 4096 here because of uint32_t *data offset: 4096 / 4 uint32s */ uint32_t page_offset = 1024 * page_no; /* Top 4 bits: lane. There is 32 lane start positions in the 4k page */ uint32_t lane_no = (key >> (byt * 8 + 4)) && 15; /* But 4096 / 4 == 1024 elements, which then divided by 16 == 64 uint32_t elems */ uint32_t lane_offset = lane_no * 64; // FIXME: Rerhink what is needed for continuations! // I think we should store A, B, C and D lane retvals plus where we are // or maybe just the "where we are" and figure out with logic here, // but maybe I need to just save flags (4x1 bytes) for "does lane-ABCD search needed?" as that is faster to simd branch pred? /* A lane has 8x32 bit keys, then 8x32 bit values. 16 uint32_t elems. */ /* So grab the A, B, C and D candidate lanes for each lane_offset. */ simd_map_lane *lane_a = (simd_map_lane *) map->data + page_offset + lane_offset; simd_map_lane *lane_b = lane_a + 1; simd_map_lane *lane_c = lane_b + 1; simd_map_lane *lane_d = lane_c + 1; /* Further lanes only needed if ours is fully filled */ /* Overlay simd and integer units here for perf */ uint32_t *afind = simd_map_lane_find( lane_a, key, 0, /* lane modulo: 0 means until lane end */ 0, /* FIXME - from continuation! */ NULL); /* FIXME - we should fill a *lane_next_begin ptr here */ uint32_t bneed = simd_map_lane_last_value(lane_a); if(afind) { ret.value_location = afind; ret.level = level; } /* TODO: Implement B, C and D */ uint32_t cneed = simd_map_lane_last_value(lane_b); uint32_t dneed = simd_map_lane_last_value(lane_c); /* Check if we need to jump to the next level and do */ uint32_t more = simd_map_lane_last_value(lane_c); if(!more) return ret; ++level; } return ret; } /** * Try to search the map for the given key. * * @param map The map to search in * @param key The key to search for * @returns NULL if there is no value stored, otherwise ptr to first match with the given key. */ static inline uint32_t *search_vmap(vmap *map, uint32_t key) { vmap_find_res res = search_all_vmap(map, key, vmap_search_all_begin()); return res.value_location; } #endif /* VMAP_H */