#ifndef SIMD_MAP #define SIMD_MAP #include /* NULL */ #include /* uint32_t, ... */ #include #include "arena.h/arena.h" /* SIMD support */ #ifdef __AVX2__ #include #endif #ifdef __SSE2__ #include #endif /* I have no idea what MSVC has instead... */ #ifdef _MSC_VER #define SM_THREAD_LOCAL __declspec(thread) #define SM_PREFETCH(x) #define SM_LIKELY(x) #define SM_UNLIKELY(x) #define SM_NOINLINE __declspec(noinline) #define SM_ALWAYS_INLINE __forceinline #else #define SM_THREAD_LOCAL __thread #define SM_PREFETCH(x) __builtin_prefetch(x) #define SM_LIKELY(x) __builtin_expect((x),1) #define SM_UNLIKELY(x) __builtin_expect((x),0) #define SM_NOINLINE __attribute__ ((noinline)) #define SM_ALWAYS_INLINE __attribute__ ((always_inline)) #endif /* 32 byte = 256 bits = (8 * 32bit) optimized for AVX2 */ #define SM_LANE_SPAN 8 /** Grouped together keys-values to support SIMD more this way (also became a single cache line this way) */ struct simd_map_lane { uint32_t keys[SM_LANE_SPAN]; uint32_t values[SM_LANE_SPAN]; }; typedef struct simd_map_elem simd_map_elem; struct simd_map { arena a; simd_map_lane *lanes; uint32_t end; /* in lanes */ uint32_t usage_end; /* in lanes!!! */ int lane_modulo; /* [0..SM_LANE_SPAN) */ }; typedef struct simd_map simd_map; /** Create a simd map instance */ static inline SM_ALWAYS_INLINE simd_map simd_map_create() { simd_map ret; ret.a = newarena((ptrdiff_t)1 << 33); ret.end = 0; ret.usage_end = 0; ret.lanes = (simd_map_lane*)(((auint64*) aralloc(&(ret.a), sizeof(uint64_t), sizeof(simd_map_lane), 1)) /* aligned! */ + 1); /* First really addressible thing */ ret.lane_modulo = 0; return ret; } /** Free all resources held by the map. Returns 0 on errors. */ static inline SM_ALWAYS_INLINE char simd_map_free(simd_map *map) { return freearena(&(map->a)); } /** * Returns if this key is stored in the given map LANE or not - returns NULL if not found. * * Rem.: The lane_begin and lane_next_begin parameters are used for reentrant multisearch. * * @param map_lane The lane to find in. * @param key The key to search for. * @param lane_modulo When non-zero, the lane only searched until this index. Zero means all remaining elements. (mod lane length) * @param lane_begin The lane is searched from this location(find_all). If full lane / lane prefix is needed, this should be 0. * @param lane_next_begin This pointer will be filled on non-NULL retvals with the incremented in-lane index (with % modulus). * @returns NULL when not found, otherwise pointer to the stored value for the key. */ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find( simd_map_lane *map_lane, uint32_t key, int lane_modulo, int lane_begin, int *lane_next_begin) { uint32_t *keys = map_lane->keys; uint32_t *values = map_lane->values; if(SM_UNLIKELY(lane_modulo)) goto non_simd_modulo; #ifdef __AVX2__ /* TODO */ #endif #ifdef __SSE2__ /* TODO */ #endif /* Regular integer code - should have good ILP and cache locality patterns anyways */ if(lane_modulo == 0) { /** Pretty hopeful this can get more easily unrolled / autovectorized */ for(int i = lane_begin; i < SM_LANE_SPAN; ++i) { if(SM_UNLIKELY(keys[i] == key)) { uint32_t *ptr = &values[i]; *lane_next_begin = (i + 1) % SM_LANE_SPAN; return ptr; } } return NULL; } else { non_simd_modulo: for(int i = lane_begin; i < lane_modulo; ++i) { if(SM_UNLIKELY(keys[i] == key)) { uint32_t *ptr = &values[i]; *lane_next_begin = (i + 1) % SM_LANE_SPAN; return ptr; } } return NULL; } } /** The result of the find_all(..) operation */ struct simd_map_find_res { /** The found location - or NULL when the key was not found */ uint32_t *value_location; /** Meta-data for continuation of the search */ uint32_t lane_next; /** Meta-data for continuation of the search */ int lane_next_begin; }; typedef struct simd_map_find_res simd_map_find_res; /** Create the value for starting a find_all call */ static inline simd_map_find_res simd_map_find_all_begin() { simd_map_find_res ret; ret.value_location = NULL; ret.lane_next = 0; ret.lane_next_begin = 0; return ret; } /** * Useful for multimap-like operations to find multiple mappings for the same key. * * @param map The map * @param key The key to search * @param prev The previous result - or simd_map_find_all_begin() to find the first / start lookup! * @returns The found pointer / location (if any) and if location was non-NULL meta-data so we can search further same-keys! */ static inline SM_ALWAYS_INLINE simd_map_find_res simd_map_find_all(simd_map *map, uint32_t key, simd_map_find_res prev) { simd_map_find_res ret; /* Process most lanes */ /* Do not process last element (-1) because of last incomplete lane */ if(map->usage_end > 0) for(uint32_t i = prev.lane_next; i < map->usage_end - 1; ++i) { uint32_t *found = simd_map_lane_find( &(map->lanes[i]), key, 0, prev.lane_next_begin, &(ret.lane_next_begin)); /* XXX: Fills part of retval! */ /* Needed so only the currently found are ignored in find_all(..) */ prev.lane_next_begin = 0; if(found) { ret.value_location = found; ret.lane_next = i + (ret.lane_next_begin != 0); return ret; } } /* Process last lane - with a modulo lane */ if((map->usage_end > 0) && (prev.lane_next < map->usage_end)) { uint32_t *found = simd_map_lane_find( &(map->lanes[map->usage_end - 1]), key, map->lane_modulo, prev.lane_next_begin, &(ret.lane_next_begin)); /* XXX: Fills part of retval! */ /* Needed so only the currently found are ignored in find_all(..) */ prev.lane_next_begin = 0; if(found) { ret.value_location = found; ret.lane_next = (map->usage_end - 1) + (ret.lane_next_begin != 0); return ret; } } /* Not found */ ret = simd_map_find_all_begin(); return ret; } /** Returns if this key is stored in the map or not - returns NULL if does not exists. */ static inline uint32_t *simd_map_find(simd_map *map, uint32_t key) { simd_map_find_res begin = simd_map_find_all_begin(); simd_map_find_res fires = simd_map_find_all(map, key, begin); return fires.value_location; } /** * Insert without checking that the value have been already added or not. * * Useful for multimap operation or if you know the key have never been before added (faster)! * * @param map The map * @param key The key to insert * @param value The value for this key to insert * @returns 0 on errors, otherwise 1. */ static inline SM_ALWAYS_INLINE char simd_map_multi_set(simd_map *map, uint32_t key, uint32_t value) { /* Handle storage growth needs. */ uint32_t storage_needed = (map->lane_modulo == 0) ? 1 : 0; if(SM_UNLIKELY(map->end - map->usage_end < storage_needed)) { void *allret = aralloc(&(map->a), sizeof(simd_map_lane)/* esize */, 1 /* align - should be sizeof(simd_map_lane) but should be aligned here as-is already! */, storage_needed); /* ecount */ /** Return early with error but no state changes if we cannot allocate! */ if(SM_UNLIKELY(!allret)) { return 0; } /* Administer end offset */ map->end += storage_needed; } /* Administer usage end offset, separate from end because erase / shrink ops possible */ map->usage_end += storage_needed; /* Always force-insert into the last lane at lane_modulo location */ simd_map_lane *lane = &(map->lanes[map->usage_end - 1]); lane->keys[map->lane_modulo] = key; lane->values[map->lane_modulo] = value; /* Update lane modulo */ map->lane_modulo = (map->lane_modulo + 1) % SM_LANE_SPAN; return 1; } /** * Returns 0 on errors, otherwise 1 when added as new, 2 when already found got overwritten */ static inline char simd_map_set(simd_map *map, uint32_t key, uint32_t value) { uint32_t *found = simd_map_find(map, key); if(!found) { return simd_map_multi_set(map, key, value); } else { /* Overwrite already existing mapping */ *found = value; return 2; } } /** Empties the map - this does not free resources, just makes it reusable! */ static inline SM_ALWAYS_INLINE void simd_map_erase(simd_map *map) { map->usage_end = 0; map->lane_modulo = 0; } /** Returns count of elements in the given simd_map */ static inline SM_ALWAYS_INLINE size_t simd_map_size(simd_map *map) { return (map->usage_end > 0) ? (((size_t)(map->usage_end) - 1) * 8 + map->lane_modulo) : 0; } /** Returns TRUE when map is empty and false otherwise - faster than simd_map_size(..) */ static inline SM_ALWAYS_INLINE char simd_map_is_empty(simd_map *map) { return (map->usage_end == 0); } /** Returns the key location for a given value location */ static inline SM_ALWAYS_INLINE uint32_t *simd_map_key_location(uint32_t *value_location) { return value_location -= SM_LANE_SPAN; } /** Returns the lastly inserted value's location in the map - you must ensure the map has elements! */ static inline SM_ALWAYS_INLINE uint32_t *simd_map_last_location(simd_map *map) { return (map->lane_modulo > 0) ? &(map->lanes[map->usage_end - 1].values[map->lane_modulo - 1]) : &(map->lanes[map->usage_end - 1].values[SM_LANE_SPAN - 1]); } /** * Removes the found location from the map. * * Most users should prefer simd_map_remove instead. This is an unchecked operation! * * This must be called right after a find(..) or find_all(..) operation, * because the pointer can get invalidated (for example by erase or remove). * * @param map The map to remove from * @param value_location The location returned by find(..) or find_all(..) and is not yet invalidated */ static inline SM_ALWAYS_INLINE void simd_map_remove_ptr(simd_map *map, uint32_t *value_location) { /* Overwrite with the last key-value */ uint32_t *key_location = simd_map_key_location(value_location); uint32_t *last_value_location = simd_map_last_location(map); uint32_t *last_key_location = simd_map_key_location(last_value_location); *value_location = *last_value_location; *key_location = *last_key_location; /* Shrink the data structure */ if(map->lane_modulo > 0) { --(map->lane_modulo); } else { map->lane_modulo = SM_LANE_SPAN - 1; } } /** Remove the given key from the map so its not stored anymore. Returns 1 when found and removed, 0 otherwise. */ static inline int simd_map_remove(simd_map *map, uint32_t key) { if(SM_UNLIKELY(map->usage_end == 0)) return 0; uint32_t *found = simd_map_find(map, key); if(found) { simd_map_remove_ptr(map, found); return 1; } return 0; } #endif