diff --git a/amap.h b/amap.h new file mode 100644 index 0000000..80bb011 --- /dev/null +++ b/amap.h @@ -0,0 +1,25 @@ +#ifndef AMAP_H +#define AMAP_H + +/** Operations possible on the abstract map (setting with NULL should lead to getting NULL (can tombstone it though) */ +enum AMAP_OP { AMAP_SET = 0, AMAP_GET = 1, AMAP_ERASE = 2 }; +typedef enum AMAP_OP AMAP_OP; + +/** + * Function-abstraction for an abstract map data type. + * + * Operations: + * + * AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL. Returns null on error! + * AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns NULL if there is no ptr for the key. + * AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL. + * + * @param amap_instance The instance we operate upon. + * @param op Defines which operation the caller wants. + * @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc. + * @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr. + * @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error. + */ +typedef void* (*amap)(void *amap_instance, AMAP_OP op, const char *key, void *ptr); + +#endif /* AMAP_H */ diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..aef2ee9 --- /dev/null +++ b/main.cpp @@ -0,0 +1,8 @@ +#include +#include "amap.h" +#include "simap.h" + + +int main() { + return 0; +} diff --git a/makefile b/makefile new file mode 100644 index 0000000..5f2414c --- /dev/null +++ b/makefile @@ -0,0 +1,4 @@ +debug: + g++ main.cpp -g -Wall -o main +release: + g++ main.cpp -O2 -Wall -o main diff --git a/simap.h b/simap.h new file mode 100644 index 0000000..e90ea9e --- /dev/null +++ b/simap.h @@ -0,0 +1,200 @@ +#ifndef SIMAP_H +#define SIMAP_H +#include +#include +#include +#include +#include "amap.h" +#include "arena.h/arena.h" + +/** This is to ensure 8byte storage of pointers (with possible padding) */ +union simap_ptr64 { + void *ptr; + uint64_t u64; +}; +typedef union simap_ptr64 simap_ptr64; + +/** + * A "peasantly" map data structure backed by arena.h - basically a toy data structure... + * + * This is very simple, no trees, no hashes, just (hopefully) autovectorized linear lookup. + * Inserting NULLs to keys happens through tombstoning unless erase happens and we never + * shrink memory so please do not add a lot of things then remove a lot of things. + * + * We also only do heuristics against data being the key so its not "sure" and can fail... + * + * XXX: So beware that this can FAIL just "statistically most often works"! + * + * The memory layout after at *base is as follows: + * + * 8 byte: + * - void* value; + * - ? padding (only for non-64 bit pointer machines) + * + * 8 byte: + * - uint32_t previndex; + * - uint32_t nextindex; + * + * K x 8 byte: + * - char name[]; // inline stored + * - padding (divisible by 8) + * + * ELEMENTS added to it... + * + * Because of it a lookup is basically via strstr-like with 8byte steps! + * with few character names zero-padded in the search term parameter + * and if you want check extra validity by jumping back&forth in it. + */ +struct simap_instance { + arena a; + uint32_t end; + uint32_t usage_end; /* in bytes!!! */ + uint32_t next_previndex; /* in bytes!!! */ + /** see doc comment for layout and why uint64_t* is the type */ + uint64_t *base; +}; +typedef struct simap_instance simap_instance; + +static inline simap_instance simap_create() { + simap_instance ret; + ret.a = newarena((ptrdiff_t)1 << 33); + ret.end = 0; + ret.usage_end = 0; + ret.next_previndex = 0; + ret.base = ((uint64_t*) alloc(&(ret.a), sizeof(uint64_t), sizeof(uint64_t), 1)) /* addr divisible by 8 */ + + 1; /* First really addressible thing */ + return ret; +} + +union simap_c64 { + char str8[8]; + uint64_t u64; +}; +typedef union simap_char64 simap_char64; + +static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) { + /* Construct prefix (fast-key) */ + size_t keylen = strlen(key); + char is_smallkey = (keylen < 8); + + simap_c64 prefix {0}; + size_t prefixlen = is_smallkey ? keylen : 8; + strncpy(prefix.str8, key, prefixlen); + + /* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */ + const char *keyremains = key + prefixlen; + + /* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */ + /* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */ + uint64_t *base = map->base; + uint64_t *tipp = map->base; + for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) { + /* Fast lookup */ + if(*tipp == prefix.u64) { + /* First check the remains of the string (only if needed) */ + if(!is_smallkey) { + char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t)); + if(strcmp(keyremains, tippremains) != 0) { + continue; + } + } + + /* Check back & forth (jump validation) */ + uint32_t previ = *((uint32_t *)(tipp - 1)); + uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ + + sizeof(simap_ptr64) + + sizeof(uint32_t)); + + simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *)base + prevnexi); + uint64_t *retipp = (uint64_t *)(((uint8_t *)base + prevnexi) + + sizeof(simap_ptr64) + sizeof(uint32_t) + + + sizeof(uint32_t)); + + if(retipp != tipp) { + continue; + } + + /* Can have the (statistically checked) pointer */ + return ptr; + } + } + + /* Haven't found anything */ + return NULL; +} + +/** Gets padding bytes for a size to be padded to divisible alignment */ +static inline unsigned int get_size_padding(unsigned int size, unsigned int alignment) { + // return (alignment - (size % alignment)) % alignment; + return (size + alignment - 1) / alignment * alignment; +} + +/** Returns the size of the storage needed for the given key */ +static inline uint32_t simap_elem_storage_size(const char *key) { + uint32_t keysize = strlen(key); + uint32_t padding = get_size_padding(keysize, 8); + + return keysize + + sizeof(simap_ptr64) + + sizeof(uint32_t) + + sizeof(uint32_t) + + padding; +} + +/** Force-add the (key,value) to the end of the map */ +static inline void *simap_force_add_internal(simap_instance *map, const char *key, void *ptr) { + uint32_t storage_needed = simap_elem_storage_size(key); + if(map->end - map->usage_end > storage_needed) { + /* Need storage */ + /* TODO: Implement */ + // Soemthin' like = ((T*) alloc(&a, sizeof(T), sizeof(T), 1)) + 1; + } + + /* Already have the storage */ + /* TODO: Implement - beware that I NEED to store the first 8 characters as a simap_c64! */ + return NULL; +} + +/** + * A simple map data structure that fulfills amap.h + * + * Operations: + * + * AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL. + * AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns NULL if there is no ptr for the key. + * AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL. + * + * @param amap_instance The instance we operate upon. + * @param op Defines which operation the caller wants. + * @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc. + * @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr. + * @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error. + */ +static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) { + simap_instance *map = (simap_instance *) amap_instance; + + if(op == AMAP_ERASE) { + map->usage_end = 0; + return NULL; + } + + /* Search for the key - also needed for SET in order to "re-set" */ + simap_ptr64 *found = simap_search_internal(map, key); + + if(op == AMAP_GET) { + return found->ptr; + } else { + assert(op == AMAP_SET); + + if(found) { + /* Just overwrite */ + found->ptr = ptr; + return (void *) found; + } else { + } + } + + return NULL; +} + +#endif /* SIMAP_H */