simap/simd_map.h
2024-10-23 00:45:33 +02:00

336 lines
10 KiB
C

#ifndef SIMD_MAP
#define SIMD_MAP
#include <stddef.h> /* NULL */
#include <stdint.h> /* uint32_t, ... */
#include <assert.h>
#include "arena.h/arena.h"
/* SIMD support */
#ifdef __AVX2__
#include <immintrin.h>
#endif
#ifdef __SSE2__
#include <immintrin.h>
#endif
/* I have no idea what MSVC has instead... */
#ifdef _MSC_VER
#define SM_THREAD_LOCAL __declspec(thread)
#define SM_PREFETCH(x)
#define SM_LIKELY(x)
#define SM_UNLIKELY(x)
#define SM_NOINLINE __declspec(noinline)
#define SM_ALWAYS_INLINE __forceinline
#else
#define SM_THREAD_LOCAL __thread
#define SM_PREFETCH(x) __builtin_prefetch(x)
#define SM_LIKELY(x) __builtin_expect((x),1)
#define SM_UNLIKELY(x) __builtin_expect((x),0)
#define SM_NOINLINE __attribute__ ((noinline))
#define SM_ALWAYS_INLINE __attribute__ ((always_inline))
#endif
/* 32 byte = 256 bits = (8 * 32bit) optimized for AVX2 */
#define SM_LANE_SPAN 8
/** Grouped together keys-values to support SIMD more this way (also became a single cache line this way) */
struct simd_map_lane {
uint32_t keys[SM_LANE_SPAN];
uint32_t values[SM_LANE_SPAN];
};
typedef struct simd_map_elem simd_map_elem;
struct simd_map {
arena a;
simd_map_lane *lanes;
uint32_t end; /* in lanes */
uint32_t usage_end; /* in lanes!!! */
int lane_modulo; /* [0..SM_LANE_SPAN) */
};
typedef struct simd_map simd_map;
/** Create a simd map instance */
static inline SM_ALWAYS_INLINE simd_map simd_map_create() {
simd_map ret;
ret.a = newarena((ptrdiff_t)1 << 33);
ret.end = 0;
ret.usage_end = 0;
ret.lanes = (simd_map_lane*)(((auint64*) aralloc(&(ret.a), sizeof(uint64_t), sizeof(simd_map_lane), 1)) /* aligned! */
+ 1); /* First really addressible thing */
ret.lane_modulo = 0;
return ret;
}
/** Free all resources held by the map. Returns 0 on errors. */
static inline SM_ALWAYS_INLINE char simd_map_free(simd_map *map) {
return freearena(&(map->a));
}
/**
* Returns if this key is stored in the given map LANE or not - returns NULL if not found.
*
* Rem.: The lane_begin and lane_next_begin parameters are used for reentrant multisearch.
*
* @param map_lane The lane to find in.
* @param key The key to search for.
* @param lane_modulo When non-zero, the lane only searched until this index. Zero means all remaining elements. (mod lane length)
* @param lane_begin The lane is searched from this location(find_all). If full lane / lane prefix is needed, this should be 0.
* @param lane_next_begin This pointer will be filled on non-NULL retvals with the incremented in-lane index (with % modulus).
* @returns NULL when not found, otherwise pointer to the stored value for the key.
*/
static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(
simd_map_lane *map_lane,
uint32_t key,
int lane_modulo,
int lane_begin,
int *lane_next_begin) {
uint32_t *keys = map_lane->keys;
uint32_t *values = map_lane->values;
if(SM_UNLIKELY(lane_modulo)) goto non_simd_modulo;
#ifdef __AVX2__
/* TODO */
#endif
#ifdef __SSE2__
/* TODO */
#endif
/* Regular integer code - should have good ILP and cache locality patterns anyways */
if(lane_modulo == 0) {
/** Pretty hopeful this can get more easily unrolled / autovectorized */
for(int i = lane_begin; i < SM_LANE_SPAN; ++i) {
if(SM_UNLIKELY(keys[i] == key)) {
uint32_t *ptr = &values[i];
*lane_next_begin = (i + 1) % SM_LANE_SPAN;
return ptr;
}
}
return NULL;
} else {
non_simd_modulo:
for(int i = lane_begin; i < lane_modulo; ++i) {
if(SM_UNLIKELY(keys[i] == key)) {
uint32_t *ptr = &values[i];
*lane_next_begin = (i + 1) % SM_LANE_SPAN;
return ptr;
}
}
return NULL;
}
}
/** The result of the find_all(..) operation */
struct simd_map_find_res {
/** The found location - or NULL when the key was not found */
uint32_t *value_location;
/** Meta-data for continuation of the search */
uint32_t lane_next;
/** Meta-data for continuation of the search */
int lane_next_begin;
};
typedef struct simd_map_find_res simd_map_find_res;
/** Create the value for starting a find_all call */
static inline simd_map_find_res simd_map_find_all_begin() {
simd_map_find_res ret;
ret.value_location = NULL;
ret.lane_next = 0;
ret.lane_next_begin = 0;
return ret;
}
/**
* Useful for multimap-like operations to find multiple mappings for the same key.
*
* @param map The map
* @param key The key to search
* @param prev The previous result - or simd_map_find_all_begin() to find the first / start lookup!
* @returns The found pointer / location (if any) and if location was non-NULL meta-data so we can search further same-keys!
*/
static inline SM_ALWAYS_INLINE simd_map_find_res simd_map_find_all(simd_map *map, uint32_t key, simd_map_find_res prev) {
simd_map_find_res ret;
/* Process most lanes */
/* Do not process last element (-1) because of last incomplete lane */
if(map->usage_end > 0) for(uint32_t i = prev.lane_next; i < map->usage_end - 1; ++i) {
uint32_t *found = simd_map_lane_find(
&(map->lanes[i]),
key,
0,
prev.lane_next_begin,
&(ret.lane_next_begin)); /* XXX: Fills part of retval! */
/* Needed so only the currently found are ignored in find_all(..) */
prev.lane_next_begin = 0;
if(found) {
ret.value_location = found;
ret.lane_next = i + (ret.lane_next_begin != 0);
return ret;
}
}
/* Process last lane - with a modulo lane */
if((map->usage_end > 0) && (prev.lane_next < map->usage_end)) {
uint32_t *found = simd_map_lane_find(
&(map->lanes[map->usage_end - 1]),
key,
map->lane_modulo,
prev.lane_next_begin,
&(ret.lane_next_begin)); /* XXX: Fills part of retval! */
/* Needed so only the currently found are ignored in find_all(..) */
prev.lane_next_begin = 0;
if(found) {
ret.value_location = found;
ret.lane_next = (map->usage_end - 1) + (ret.lane_next_begin != 0);
return ret;
}
}
/* Not found */
ret = simd_map_find_all_begin();
return ret;
}
/** Returns if this key is stored in the map or not - returns NULL if does not exists. */
static inline uint32_t *simd_map_find(simd_map *map, uint32_t key) {
simd_map_find_res begin = simd_map_find_all_begin();
simd_map_find_res fires = simd_map_find_all(map, key, begin);
return fires.value_location;
}
/**
* Insert without checking that the value have been already added or not.
*
* Useful for multimap operation or if you know the key have never been before added (faster)!
*
* @param map The map
* @param key The key to insert
* @param value The value for this key to insert
* @returns 0 on errors, otherwise 1.
*/
static inline SM_ALWAYS_INLINE char simd_map_multi_set(simd_map *map, uint32_t key, uint32_t value) {
/* Handle storage growth needs. */
uint32_t storage_needed = (map->lane_modulo == 0) ? 1 : 0;
if(SM_UNLIKELY(map->end - map->usage_end < storage_needed)) {
void *allret = aralloc(&(map->a),
sizeof(simd_map_lane)/* esize */,
1 /* align - should be sizeof(simd_map_lane) but should be aligned here as-is already! */,
storage_needed); /* ecount */
/** Return early with error but no state changes if we cannot allocate! */
if(SM_UNLIKELY(!allret)) {
return 0;
}
/* Administer end offset */
map->end += storage_needed;
}
/* Administer usage end offset, separate from end because erase / shrink ops possible */
map->usage_end += storage_needed;
/* Always force-insert into the last lane at lane_modulo location */
simd_map_lane *lane = &(map->lanes[map->usage_end - 1]);
lane->keys[map->lane_modulo] = key;
lane->values[map->lane_modulo] = value;
/* Update lane modulo */
map->lane_modulo = (map->lane_modulo + 1) % SM_LANE_SPAN;
return 1;
}
/**
* Returns 0 on errors, otherwise 1 when added as new, 2 when already found got overwritten
*/
static inline char simd_map_set(simd_map *map, uint32_t key, uint32_t value) {
uint32_t *found = simd_map_find(map, key);
if(!found) {
return simd_map_multi_set(map, key, value);
} else {
/* Overwrite already existing mapping */
*found = value;
return 2;
}
}
/** Empties the map - this does not free resources, just makes it reusable! */
static inline SM_ALWAYS_INLINE void simd_map_erase(simd_map *map) {
map->usage_end = 0;
map->lane_modulo = 0;
}
/** Returns count of elements in the given simd_map */
static inline SM_ALWAYS_INLINE size_t simd_map_size(simd_map *map) {
return (map->usage_end > 0) ?
(((size_t)(map->usage_end) - 1) * 8 + map->lane_modulo) :
0;
}
/** Returns TRUE when map is empty and false otherwise - faster than simd_map_size(..) */
static inline SM_ALWAYS_INLINE char simd_map_is_empty(simd_map *map) {
return (map->usage_end == 0);
}
/** Returns the key location for a given value location */
static inline SM_ALWAYS_INLINE uint32_t *simd_map_key_location(uint32_t *value_location) {
return value_location -= SM_LANE_SPAN;
}
/** Returns the lastly inserted value's location in the map - you must ensure the map has elements! */
static inline SM_ALWAYS_INLINE uint32_t *simd_map_last_location(simd_map *map) {
return (map->lane_modulo > 0) ?
&(map->lanes[map->usage_end - 1].values[map->lane_modulo - 1]) :
&(map->lanes[map->usage_end - 1].values[SM_LANE_SPAN - 1]);
}
/**
* Removes the found location from the map.
*
* Most users should prefer simd_map_remove instead. This is an unchecked operation!
*
* This must be called right after a find(..) or find_all(..) operation,
* because the pointer can get invalidated (for example by erase or remove).
*
* @param map The map to remove from
* @param value_location The location returned by find(..) or find_all(..) and is not yet invalidated
*/
static inline SM_ALWAYS_INLINE void simd_map_remove_ptr(simd_map *map, uint32_t *value_location) {
/* Overwrite with the last key-value */
uint32_t *key_location = simd_map_key_location(value_location);
uint32_t *last_value_location = simd_map_last_location(map);
uint32_t *last_key_location = simd_map_key_location(last_value_location);
*value_location = *last_value_location;
*key_location = *last_key_location;
/* Shrink the data structure */
if(map->lane_modulo > 0) {
--(map->lane_modulo);
} else {
map->lane_modulo = SM_LANE_SPAN - 1;
}
}
/** Remove the given key from the map so its not stored anymore. Returns 1 when found and removed, 0 otherwise. */
static inline int simd_map_remove(simd_map *map, uint32_t key) {
if(SM_UNLIKELY(map->usage_end == 0)) return 0;
uint32_t *found = simd_map_find(map, key);
if(found) {
simd_map_remove_ptr(map, found);
return 1;
}
return 0;
}
#endif