261 lines
7.7 KiB
C
261 lines
7.7 KiB
C
#ifndef SIMAP_H
|
|
#define SIMAP_H
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include "amap.h"
|
|
#include "arena.h/arena.h"
|
|
|
|
/** This is to ensure 8byte storage of pointers (with possible padding) */
|
|
union simap_ptr64 {
|
|
void *ptr;
|
|
uint64_t u64;
|
|
};
|
|
typedef union simap_ptr64 simap_ptr64;
|
|
|
|
/**
|
|
* A "peasantly" map data structure backed by arena.h - basically a toy data structure...
|
|
*
|
|
* This is very simple, no trees, no hashes, just (hopefully) autovectorized linear lookup.
|
|
* Inserting NULLs to keys happens through tombstoning unless erase happens and we never
|
|
* shrink memory so please do not add a lot of things then remove a lot of things.
|
|
*
|
|
* We also only do heuristics against data being the key so its not "sure" and can fail...
|
|
*
|
|
* XXX: So beware that this can FAIL just "statistically most often works"!
|
|
*
|
|
* The memory layout after at *base is as follows:
|
|
*
|
|
* 8 byte:
|
|
* - void* value;
|
|
* - ? padding (only for non-64 bit pointer machines)
|
|
*
|
|
* 8 byte:
|
|
* - uint32_t previndex;
|
|
* - uint32_t nextindex;
|
|
*
|
|
* K x 8 byte:
|
|
* - char name[]; // inline stored
|
|
* - padding (divisible by 8)
|
|
*
|
|
* ELEMENTS added to it...
|
|
*
|
|
* Because of it a lookup is basically via strstr-like with 8byte steps!
|
|
* with few character names zero-padded in the search term parameter
|
|
* and if you want check extra validity by jumping back&forth in it.
|
|
*/
|
|
struct simap_instance {
|
|
arena a;
|
|
uint32_t end;
|
|
uint32_t prev_usage_end; /* previous usage_end or -1 if no previous exists... in bytes!!! */
|
|
uint32_t usage_end; /* in bytes!!! */
|
|
uint32_t next_previndex; /* in bytes!!! */
|
|
/** see doc comment for layout and why uint64_t* is the type */
|
|
uint64_t *base;
|
|
};
|
|
typedef struct simap_instance simap_instance;
|
|
|
|
static inline simap_instance simap_create() {
|
|
simap_instance ret;
|
|
ret.a = newarena((ptrdiff_t)1 << 33);
|
|
ret.end = 0;
|
|
ret.prev_usage_end = (uint32_t) -1;
|
|
ret.usage_end = 0;
|
|
ret.next_previndex = 0;
|
|
ret.base = ((uint64_t*) aralloc(&(ret.a), sizeof(uint64_t), sizeof(uint64_t), 1)) /* addr divisible by 8 */
|
|
+ 1; /* First really addressible thing */
|
|
return ret;
|
|
}
|
|
|
|
union simap_c64 {
|
|
char str8[8];
|
|
uint64_t u64;
|
|
};
|
|
typedef union simap_char64 simap_char64;
|
|
|
|
static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) {
|
|
/* Construct prefix (fast-key) */
|
|
size_t keylen = strlen(key);
|
|
char is_smallkey = (keylen < 8);
|
|
|
|
simap_c64 prefix {0};
|
|
size_t prefixlen = is_smallkey ? keylen : 8;
|
|
strncpy(prefix.str8, key, prefixlen);
|
|
|
|
/* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */
|
|
const char *keyremains = key + prefixlen;
|
|
|
|
/* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */
|
|
/* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */
|
|
uint64_t *base = map->base;
|
|
uint64_t *tipp = map->base;
|
|
for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) {
|
|
/* Fast lookup */
|
|
if(*tipp == prefix.u64) {
|
|
/* First check the remains of the string (only if needed) */
|
|
if(!is_smallkey) {
|
|
char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t));
|
|
if(strcmp(keyremains, tippremains) != 0) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *) (tipp - 2));
|
|
|
|
/* Check back & forth (jump validation) */
|
|
uint32_t previ = *((uint32_t *)(tipp - 1));
|
|
if(previ == (uint32_t) -1) {
|
|
/* Expect it be good if it was first insert ever? Statistically rare to be not like it */
|
|
return ptr;
|
|
}
|
|
uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ
|
|
+ sizeof(simap_ptr64)
|
|
+ sizeof(uint32_t));
|
|
|
|
uint64_t *retipp = (uint64_t *)(((uint8_t *)base + prevnexi)
|
|
+ sizeof(simap_ptr64) + sizeof(uint32_t) +
|
|
+ sizeof(uint32_t));
|
|
|
|
if(retipp != tipp) {
|
|
continue;
|
|
}
|
|
|
|
/* Can have the (statistically checked) pointer */
|
|
return ptr;
|
|
}
|
|
}
|
|
|
|
/* Haven't found anything */
|
|
return NULL;
|
|
}
|
|
|
|
/** Gets padding bytes for a size to be padded to divisible alignment */
|
|
static inline unsigned int get_size_padding(unsigned int size, unsigned int alignment) {
|
|
// return (alignment - (size % alignment)) % alignment;
|
|
return (size + alignment - 1) / alignment * alignment;
|
|
}
|
|
|
|
/** Returns the size of the storage needed for the given key */
|
|
static inline uint32_t simap_elem_storage_size(const char *key) {
|
|
uint32_t keysize = strlen(key);
|
|
uint32_t padding = get_size_padding(keysize, 8);
|
|
|
|
return keysize +
|
|
sizeof(simap_ptr64) +
|
|
sizeof(uint32_t) +
|
|
sizeof(uint32_t) +
|
|
padding;
|
|
}
|
|
|
|
/** Force-add the (key,value) to the end of the map */
|
|
static inline void *simap_force_add_internal(simap_instance *map, const char *key, void *ptr) {
|
|
uint32_t storage_needed = simap_elem_storage_size(key);
|
|
assert((storage_needed & 8) == 0);
|
|
if(map->end - map->usage_end > storage_needed) {
|
|
/* Need storage */
|
|
aralloc(&(map->a),
|
|
sizeof(uint8_t)/*esize*/,
|
|
1 /*align - should be 8 but should be aligned here as-is! */,
|
|
storage_needed);
|
|
|
|
/* Administer end offset */
|
|
map->end += storage_needed;
|
|
}
|
|
|
|
/* Already have the storage */
|
|
|
|
/* Create first 8 char encoding (this ensures endianness and all such stuff) */
|
|
simap_c64 first8 {0};
|
|
uint32_t keylen = strlen(key);
|
|
strncpy(first8.str8, key, (keylen < 8) ? keylen : 8);
|
|
|
|
uint32_t usi = map->usage_end;
|
|
uint32_t previ = map->prev_usage_end;
|
|
|
|
/* Save data ptr */
|
|
simap_ptr64 *data = (simap_ptr64 *)((uint8_t *)(map->base) + usi);
|
|
data->ptr = ptr;
|
|
|
|
/* Save link to previous */
|
|
uint32_t *usprev = (uint32_t *)((uint8_t *)(map->base) +
|
|
sizeof(simap_ptr64) +
|
|
sizeof(uint32_t));
|
|
*usprev = previ;
|
|
/* and nex */
|
|
*(usprev + 1) = (uint32_t) -1;
|
|
|
|
/* First 8 bytes */
|
|
simap_c64 *start_str = (simap_c64 *)(usprev + 2);
|
|
*start_str = first8;
|
|
|
|
/* Remainin bytes */
|
|
if(keylen > 8) {
|
|
/* uint32_t key_remains = keylen - 8; */
|
|
char *rem_str = (char *)(start_str + 1);
|
|
strcpy(rem_str, key + 8);
|
|
}
|
|
|
|
/* Update previous with linkage */
|
|
if(previ != (uint32_t)-1) {
|
|
uint32_t *prevnex = (uint32_t *)((uint8_t *)(map->base) + previ +
|
|
sizeof(simap_ptr64) +
|
|
sizeof(uint32_t));
|
|
*prevnex = usi;
|
|
}
|
|
|
|
/* Update prev usage end */
|
|
map->prev_usage_end = usi;
|
|
|
|
/* Administer usage_end offset */
|
|
map->usage_end += storage_needed;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* A simple map data structure that fulfills amap.h
|
|
*
|
|
* Operations:
|
|
*
|
|
* AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL.
|
|
* AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns "ptr" if there is no data for the key.
|
|
* AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL.
|
|
*
|
|
* @param amap_instance The instance we operate upon.
|
|
* @param op Defines which operation the caller wants.
|
|
* @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc.
|
|
* @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr and the "nt found" ptr to return in get...
|
|
* @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error.
|
|
*/
|
|
static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) {
|
|
simap_instance *map = (simap_instance *) amap_instance;
|
|
|
|
if(op == AMAP_ERASE) {
|
|
map->usage_end = 0;
|
|
return NULL;
|
|
}
|
|
|
|
/* Search for the key - also needed for SET in order to "re-set" */
|
|
simap_ptr64 *found = simap_search_internal(map, key);
|
|
|
|
if(op == AMAP_GET) {
|
|
return found ? found->ptr : ptr;
|
|
} else {
|
|
assert(op == AMAP_SET);
|
|
|
|
if(found) {
|
|
/* Just overwrite */
|
|
found->ptr = ptr;
|
|
return (void *) found;
|
|
} else {
|
|
return simap_force_add_internal(map, key, ptr);
|
|
}
|
|
}
|
|
|
|
assert(false); /* should be unreachable */
|
|
return ptr;
|
|
}
|
|
|
|
#endif /* SIMAP_H */
|