#ifndef SIMD_MAP #define SIMD_MAP #include /* NULL */ #include /* uint32_t, ... */ #include #include "arena.h/arena.h" /* SIMD support */ #ifdef __AVX2__ #include #endif #ifdef __SSE2__ #include #endif /* I have no idea what MSVC has instead... */ #ifdef _MSC_VER #define SM_THREAD_LOCAL __declspec(thread) #define SM_PREFETCH(x) #define SM_LIKELY(x) #define SM_UNLIKELY(x) #define SM_NOINLINE __declspec(noinline) #define SM_ALWAYS_INLINE __forceinline #else #define SM_THREAD_LOCAL __thread #define SM_PREFETCH(x) __builtin_prefetch(x) #define SM_LIKELY(x) __builtin_expect((x),1) #define SM_UNLIKELY(x) __builtin_expect((x),0) #define SM_NOINLINE __attribute__ ((noinline)) #define SM_ALWAYS_INLINE __attribute__ ((always_inline)) #endif /* 32 byte = 256 bits = (8 * 32bit) optimized for AVX2 */ #define SM_LANE_SPAN 8 /** Grouped together keys-values to support SIMD more this way (also became a single cache line this way) */ struct simd_map_lane { uint32_t keys[SM_LANE_SPAN]; uint32_t values[SM_LANE_SPAN]; }; typedef struct simd_map_elem simd_map_elem; struct simd_map { arena a; simd_map_lane *lanes; uint32_t end; /* in lanes */ uint32_t usage_end; /* in lanes!!! */ int lane_modulo; /* [0..SM_LANE_SPAN) */ }; typedef struct simd_map simd_map; /** Create a simd map instance */ static inline SM_ALWAYS_INLINE simd_map simd_map_create() { simd_map ret; ret.a = newarena((ptrdiff_t)1 << 33); ret.end = 0; ret.usage_end = 0; ret.lanes = (simd_map_lane*)(((auint64*) aralloc(&(ret.a), sizeof(uint64_t), sizeof(simd_map_lane), 1)) /* aligned! */ + 1); /* First really addressible thing */ ret.lane_modulo = 0; return ret; } /** Free all resources held by the map. Returns 0 on errors. */ static inline SM_ALWAYS_INLINE char simd_map_free(simd_map *map) { return freearena(&(map->a)); } /** * Returns if this key is stored in the given map LANE or not - returns NULL if not found. * * @param map_lane The lane to find in. * @param key The key to search for. * @param lane_modulo When non-zero, the lane only searched by this many elements. Zero means all elements. (mod lane length) * @returns NULL when not found, otherwise pointer to the stored value for the key. */ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(simd_map_lane *map_lane, uint32_t key, int lane_modulo) { uint32_t *keys = map_lane->keys; uint32_t *values = map_lane->values; if(SM_UNLIKELY(lane_modulo)) goto non_simd_modulo; #ifdef __AVX2__ /* TODO */ #endif #ifdef __SSE2__ /* TODO */ #endif /* Regular integer code - should have good ILP and cache locality patterns anyways */ if(lane_modulo == 0) { /** Pretty hopeful this can get more easily inlined */ for(int i = 0; i < SM_LANE_SPAN; ++i) { if(SM_UNLIKELY(keys[i] == key)) { return &values[i]; } } return NULL; } else { non_simd_modulo: for(int i = 0; i < lane_modulo; ++i) { if(SM_UNLIKELY(keys[i] == key)) { return &values[i]; } } return NULL; } } /** Returns if this key is stored in the map or not - returns NULL if does not exists. */ static inline uint32_t *simd_map_find(simd_map *map, uint32_t key) { /* Do not process last element because of last incomplete lane */ if(map->usage_end > 0) for(uint32_t i = 0; i < map->usage_end - 1; ++i) { uint32_t *found = simd_map_lane_find(&(map->lanes[i]), key, 0); if(found) return found; } /* Process last lane - with a modulo lane */ if(map->usage_end > 0) { uint32_t *found = simd_map_lane_find( &(map->lanes[map->usage_end - 1]), key, map->lane_modulo); if(found) return found; } /* Not found */ return NULL; } /** Useful if you know the key have never been before added (faster)! Returns 0 on errors, otherwise 1. */ static inline char simd_map_force_insert(simd_map *map, uint32_t key, uint32_t value) { /* Handle storage growth needs. */ uint32_t storage_needed = (map->lane_modulo == 0) ? 1 : 0; if(SM_UNLIKELY(map->end - map->usage_end < storage_needed)) { void *allret = aralloc(&(map->a), sizeof(simd_map_lane)/* esize */, 1 /* align - should be sizeof(simd_map_lane) but should be aligned here as-is already! */, storage_needed); /* ecount */ /** Return early with error but no state changes if we cannot allocate! */ if(SM_UNLIKELY(!allret)) { return 0; } /* Administer end offset */ map->end += storage_needed; } /* Administer usage end offset, separate from end because erase / shrink ops possible */ map->usage_end += storage_needed; /* Always force-insert into the last lane at lane_modulo location */ simd_map_lane *lane = &(map->lanes[map->usage_end - 1]); lane->keys[map->lane_modulo] = key; lane->values[map->lane_modulo] = value; /* Update lane modulo */ map->lane_modulo = (map->lane_modulo + 1) % SM_LANE_SPAN; return 1; } /** Returns 0 on errors, otherwise 1 when added as new, 2 when already found got overwritten */ static inline char simd_map_set(simd_map *map, uint32_t key, uint32_t value) { uint32_t *found = simd_map_find(map, key); if(!found) { return simd_map_force_insert(map, key, value); } else { /* Overwrite already existing mapping */ *found = value; return 2; } } /** Empties the map - this does not free resources, just makes it reusable! */ static inline void simd_map_erase(simd_map *map) { map->usage_end = 0; map->lane_modulo = 0; } /** Remove the given key from the map so its not stored anymore. Returns 1 when found and removed, 0 otherwise. */ static inline int simd_map_remove(simd_map *map, uint32_t key) { assert(0); // TODO: Implement by swapping to end + shrink! } #endif