#ifndef SIMAP_H #define SIMAP_H #include #include #include #include #include "amap.h" #include "arena.h/arena.h" /** This is to ensure 8byte storage of pointers (with possible padding) */ union simap_ptr64 { void *ptr; uint64_t u64; }; typedef union simap_ptr64 simap_ptr64; /** * A "peasantly" map data structure backed by arena.h - basically a toy data structure... * * This is very simple, no trees, no hashes, just (hopefully) autovectorized linear lookup. * Inserting NULLs to keys happens through tombstoning unless erase happens and we never * shrink memory so please do not add a lot of things then remove a lot of things. * * We also only do heuristics against data being the key so its not "sure" and can fail... * * XXX: So beware that this can FAIL just "statistically most often works"! * * The memory layout after at *base is as follows: * * 8 byte: * - void* value; * - ? padding (only for non-64 bit pointer machines) * * 8 byte: * - uint32_t previndex; * - uint32_t nextindex; * * K x 8 byte: * - char name[]; // inline stored * - padding (divisible by 8) * * ELEMENTS added to it... * * Because of it a lookup is basically via strstr-like with 8byte steps! * with few character names zero-padded in the search term parameter * and if you want check extra validity by jumping back&forth in it. */ struct simap_instance { arena a; uint32_t end; uint32_t usage_end; /* in bytes!!! */ uint32_t next_previndex; /* in bytes!!! */ /** see doc comment for layout and why uint64_t* is the type */ uint64_t *base; }; typedef struct simap_instance simap_instance; static inline simap_instance simap_create() { simap_instance ret; ret.a = newarena((ptrdiff_t)1 << 33); ret.end = 0; ret.usage_end = 0; ret.next_previndex = 0; ret.base = ((uint64_t*) alloc(&(ret.a), sizeof(uint64_t), sizeof(uint64_t), 1)) /* addr divisible by 8 */ + 1; /* First really addressible thing */ return ret; } union simap_c64 { char str8[8]; uint64_t u64; }; typedef union simap_char64 simap_char64; static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) { /* Construct prefix (fast-key) */ size_t keylen = strlen(key); char is_smallkey = (keylen < 8); simap_c64 prefix {0}; size_t prefixlen = is_smallkey ? keylen : 8; strncpy(prefix.str8, key, prefixlen); /* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */ const char *keyremains = key + prefixlen; /* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */ /* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */ uint64_t *base = map->base; uint64_t *tipp = map->base; for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) { /* Fast lookup */ if(*tipp == prefix.u64) { /* First check the remains of the string (only if needed) */ if(!is_smallkey) { char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t)); if(strcmp(keyremains, tippremains) != 0) { continue; } } /* Check back & forth (jump validation) */ uint32_t previ = *((uint32_t *)(tipp - 1)); uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ + sizeof(simap_ptr64) + sizeof(uint32_t)); simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *)base + prevnexi); uint64_t *retipp = (uint64_t *)(((uint8_t *)base + prevnexi) + sizeof(simap_ptr64) + sizeof(uint32_t) + + sizeof(uint32_t)); if(retipp != tipp) { continue; } /* Can have the (statistically checked) pointer */ return ptr; } } /* Haven't found anything */ return NULL; } /** Gets padding bytes for a size to be padded to divisible alignment */ static inline unsigned int get_size_padding(unsigned int size, unsigned int alignment) { // return (alignment - (size % alignment)) % alignment; return (size + alignment - 1) / alignment * alignment; } /** Returns the size of the storage needed for the given key */ static inline uint32_t simap_elem_storage_size(const char *key) { uint32_t keysize = strlen(key); uint32_t padding = get_size_padding(keysize, 8); return keysize + sizeof(simap_ptr64) + sizeof(uint32_t) + sizeof(uint32_t) + padding; } /** Force-add the (key,value) to the end of the map */ static inline void *simap_force_add_internal(simap_instance *map, const char *key, void *ptr) { uint32_t storage_needed = simap_elem_storage_size(key); if(map->end - map->usage_end > storage_needed) { /* Need storage */ /* TODO: Implement */ // Soemthin' like = ((T*) alloc(&a, sizeof(T), sizeof(T), 1)) + 1; } /* Already have the storage */ /* TODO: Implement - beware that I NEED to store the first 8 characters as a simap_c64! */ return NULL; } /** * A simple map data structure that fulfills amap.h * * Operations: * * AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL. * AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns NULL if there is no ptr for the key. * AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL. * * @param amap_instance The instance we operate upon. * @param op Defines which operation the caller wants. * @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc. * @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr. * @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error. */ static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) { simap_instance *map = (simap_instance *) amap_instance; if(op == AMAP_ERASE) { map->usage_end = 0; return NULL; } /* Search for the key - also needed for SET in order to "re-set" */ simap_ptr64 *found = simap_search_internal(map, key); if(op == AMAP_GET) { return found->ptr; } else { assert(op == AMAP_SET); if(found) { /* Just overwrite */ found->ptr = ptr; return (void *) found; } else { } } return NULL; } #endif /* SIMAP_H */