201 lines
6.1 KiB
C
201 lines
6.1 KiB
C
|
#ifndef SIMAP_H
|
||
|
#define SIMAP_H
|
||
|
#include <stddef.h>
|
||
|
#include <stdint.h>
|
||
|
#include <string.h>
|
||
|
#include <assert.h>
|
||
|
#include "amap.h"
|
||
|
#include "arena.h/arena.h"
|
||
|
|
||
|
/** This is to ensure 8byte storage of pointers (with possible padding) */
|
||
|
union simap_ptr64 {
|
||
|
void *ptr;
|
||
|
uint64_t u64;
|
||
|
};
|
||
|
typedef union simap_ptr64 simap_ptr64;
|
||
|
|
||
|
/**
|
||
|
* A "peasantly" map data structure backed by arena.h - basically a toy data structure...
|
||
|
*
|
||
|
* This is very simple, no trees, no hashes, just (hopefully) autovectorized linear lookup.
|
||
|
* Inserting NULLs to keys happens through tombstoning unless erase happens and we never
|
||
|
* shrink memory so please do not add a lot of things then remove a lot of things.
|
||
|
*
|
||
|
* We also only do heuristics against data being the key so its not "sure" and can fail...
|
||
|
*
|
||
|
* XXX: So beware that this can FAIL just "statistically most often works"!
|
||
|
*
|
||
|
* The memory layout after at *base is as follows:
|
||
|
*
|
||
|
* 8 byte:
|
||
|
* - void* value;
|
||
|
* - ? padding (only for non-64 bit pointer machines)
|
||
|
*
|
||
|
* 8 byte:
|
||
|
* - uint32_t previndex;
|
||
|
* - uint32_t nextindex;
|
||
|
*
|
||
|
* K x 8 byte:
|
||
|
* - char name[]; // inline stored
|
||
|
* - padding (divisible by 8)
|
||
|
*
|
||
|
* ELEMENTS added to it...
|
||
|
*
|
||
|
* Because of it a lookup is basically via strstr-like with 8byte steps!
|
||
|
* with few character names zero-padded in the search term parameter
|
||
|
* and if you want check extra validity by jumping back&forth in it.
|
||
|
*/
|
||
|
struct simap_instance {
|
||
|
arena a;
|
||
|
uint32_t end;
|
||
|
uint32_t usage_end; /* in bytes!!! */
|
||
|
uint32_t next_previndex; /* in bytes!!! */
|
||
|
/** see doc comment for layout and why uint64_t* is the type */
|
||
|
uint64_t *base;
|
||
|
};
|
||
|
typedef struct simap_instance simap_instance;
|
||
|
|
||
|
static inline simap_instance simap_create() {
|
||
|
simap_instance ret;
|
||
|
ret.a = newarena((ptrdiff_t)1 << 33);
|
||
|
ret.end = 0;
|
||
|
ret.usage_end = 0;
|
||
|
ret.next_previndex = 0;
|
||
|
ret.base = ((uint64_t*) alloc(&(ret.a), sizeof(uint64_t), sizeof(uint64_t), 1)) /* addr divisible by 8 */
|
||
|
+ 1; /* First really addressible thing */
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
union simap_c64 {
|
||
|
char str8[8];
|
||
|
uint64_t u64;
|
||
|
};
|
||
|
typedef union simap_char64 simap_char64;
|
||
|
|
||
|
static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) {
|
||
|
/* Construct prefix (fast-key) */
|
||
|
size_t keylen = strlen(key);
|
||
|
char is_smallkey = (keylen < 8);
|
||
|
|
||
|
simap_c64 prefix {0};
|
||
|
size_t prefixlen = is_smallkey ? keylen : 8;
|
||
|
strncpy(prefix.str8, key, prefixlen);
|
||
|
|
||
|
/* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */
|
||
|
const char *keyremains = key + prefixlen;
|
||
|
|
||
|
/* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */
|
||
|
/* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */
|
||
|
uint64_t *base = map->base;
|
||
|
uint64_t *tipp = map->base;
|
||
|
for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) {
|
||
|
/* Fast lookup */
|
||
|
if(*tipp == prefix.u64) {
|
||
|
/* First check the remains of the string (only if needed) */
|
||
|
if(!is_smallkey) {
|
||
|
char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t));
|
||
|
if(strcmp(keyremains, tippremains) != 0) {
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Check back & forth (jump validation) */
|
||
|
uint32_t previ = *((uint32_t *)(tipp - 1));
|
||
|
uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ
|
||
|
+ sizeof(simap_ptr64)
|
||
|
+ sizeof(uint32_t));
|
||
|
|
||
|
simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *)base + prevnexi);
|
||
|
uint64_t *retipp = (uint64_t *)(((uint8_t *)base + prevnexi)
|
||
|
+ sizeof(simap_ptr64) + sizeof(uint32_t) +
|
||
|
+ sizeof(uint32_t));
|
||
|
|
||
|
if(retipp != tipp) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* Can have the (statistically checked) pointer */
|
||
|
return ptr;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Haven't found anything */
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/** Gets padding bytes for a size to be padded to divisible alignment */
|
||
|
static inline unsigned int get_size_padding(unsigned int size, unsigned int alignment) {
|
||
|
// return (alignment - (size % alignment)) % alignment;
|
||
|
return (size + alignment - 1) / alignment * alignment;
|
||
|
}
|
||
|
|
||
|
/** Returns the size of the storage needed for the given key */
|
||
|
static inline uint32_t simap_elem_storage_size(const char *key) {
|
||
|
uint32_t keysize = strlen(key);
|
||
|
uint32_t padding = get_size_padding(keysize, 8);
|
||
|
|
||
|
return keysize +
|
||
|
sizeof(simap_ptr64) +
|
||
|
sizeof(uint32_t) +
|
||
|
sizeof(uint32_t) +
|
||
|
padding;
|
||
|
}
|
||
|
|
||
|
/** Force-add the (key,value) to the end of the map */
|
||
|
static inline void *simap_force_add_internal(simap_instance *map, const char *key, void *ptr) {
|
||
|
uint32_t storage_needed = simap_elem_storage_size(key);
|
||
|
if(map->end - map->usage_end > storage_needed) {
|
||
|
/* Need storage */
|
||
|
/* TODO: Implement */
|
||
|
// Soemthin' like = ((T*) alloc(&a, sizeof(T), sizeof(T), 1)) + 1;
|
||
|
}
|
||
|
|
||
|
/* Already have the storage */
|
||
|
/* TODO: Implement - beware that I NEED to store the first 8 characters as a simap_c64! */
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* A simple map data structure that fulfills amap.h
|
||
|
*
|
||
|
* Operations:
|
||
|
*
|
||
|
* AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL.
|
||
|
* AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns NULL if there is no ptr for the key.
|
||
|
* AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL.
|
||
|
*
|
||
|
* @param amap_instance The instance we operate upon.
|
||
|
* @param op Defines which operation the caller wants.
|
||
|
* @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc.
|
||
|
* @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr.
|
||
|
* @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error.
|
||
|
*/
|
||
|
static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) {
|
||
|
simap_instance *map = (simap_instance *) amap_instance;
|
||
|
|
||
|
if(op == AMAP_ERASE) {
|
||
|
map->usage_end = 0;
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/* Search for the key - also needed for SET in order to "re-set" */
|
||
|
simap_ptr64 *found = simap_search_internal(map, key);
|
||
|
|
||
|
if(op == AMAP_GET) {
|
||
|
return found->ptr;
|
||
|
} else {
|
||
|
assert(op == AMAP_SET);
|
||
|
|
||
|
if(found) {
|
||
|
/* Just overwrite */
|
||
|
found->ptr = ptr;
|
||
|
return (void *) found;
|
||
|
} else {
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
#endif /* SIMAP_H */
|