#ifndef SIMAP_H #define SIMAP_H #include #include #include #include #include "amap.h" #include "arena.h/arena.h" /** This is to ensure 8byte storage of pointers (with possible padding) */ union simap_ptr64 { void *ptr; uint64_t u64; }; typedef union simap_ptr64 simap_ptr64; /** * A "peasantly" map data structure backed by arena.h - basically a toy data structure... * * This is very simple, no trees, no hashes, just (hopefully) autovectorized linear lookup. * Inserting NULLs to keys happens through tombstoning unless erase happens and we never * shrink memory so please do not add a lot of things then remove a lot of things. * * We also only do heuristics against data being the key so its not "sure" and can fail... * * XXX: So beware that this can FAIL just "statistically most often works"! * * The memory layout after at *base is as follows: * * 8 byte: * - void* value; * - ? padding (only for non-64 bit pointer machines) * * 8 byte: * - uint32_t previndex; * - uint32_t nextindex; * * K x 8 byte: * - char name[]; // inline stored * - padding (divisible by 8) * * ELEMENTS added to it... * * Because of it a lookup is basically via strstr-like with 8byte steps! * with few character names zero-padded in the search term parameter * and if you want check extra validity by jumping back&forth in it. */ struct simap_instance { arena a; uint32_t end; uint32_t prev_usage_end; /* previous usage_end or -1 if no previous exists... in bytes!!! */ uint32_t usage_end; /* in bytes!!! */ uint32_t next_previndex; /* in bytes!!! */ /** see doc comment for layout and why uint64_t* is the type */ uint64_t *base; }; typedef struct simap_instance simap_instance; static inline simap_instance simap_create() { simap_instance ret; ret.a = newarena((ptrdiff_t)1 << 33); ret.end = 0; ret.prev_usage_end = (uint32_t) -1; ret.usage_end = 0; ret.next_previndex = 0; ret.base = ((uint64_t*) aralloc(&(ret.a), sizeof(uint64_t), sizeof(uint64_t), 1)) /* addr divisible by 8 */ + 1; /* First really addressible thing */ return ret; } union simap_c64 { char str8[8]; uint64_t u64; }; typedef union simap_char64 simap_char64; static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) { /* Construct prefix (fast-key) */ size_t keylen = strlen(key); char is_smallkey = (keylen < 8); simap_c64 prefix {0}; size_t prefixlen = is_smallkey ? keylen : 8; strncpy(prefix.str8, key, prefixlen); /* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */ const char *keyremains = key + prefixlen; /* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */ /* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */ uint64_t *base = map->base; uint64_t *tipp = map->base; for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) { /* Fast lookup */ if(*tipp == prefix.u64) { /* First check the remains of the string (only if needed) */ if(!is_smallkey) { char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t)); if(strcmp(keyremains, tippremains) != 0) { continue; } } simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *) (tipp - 2)); /* Check back & forth (jump validation) */ uint32_t previ = *((uint32_t *)(tipp - 1)); if(previ == (uint32_t) -1) { /* Expect it be good if it was first insert ever? Statistically rare to be not like it */ return ptr; } uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ + sizeof(simap_ptr64) + sizeof(uint32_t)); uint64_t *retipp = (uint64_t *)(((uint8_t *)base + prevnexi) + sizeof(simap_ptr64) + sizeof(uint32_t) + + sizeof(uint32_t)); if(retipp != tipp) { continue; } /* Can have the (statistically checked) pointer */ return ptr; } } /* Haven't found anything */ return NULL; } /** Gets padding bytes for a size to be padded to divisible alignment */ static inline unsigned int get_size_padding(unsigned int size, unsigned int alignment) { // return (alignment - (size % alignment)) % alignment; return (size + alignment - 1) / alignment * alignment; } /** Returns the size of the storage needed for the given key */ static inline uint32_t simap_elem_storage_size(const char *key) { uint32_t keysize = strlen(key); uint32_t padding = get_size_padding(keysize, 8); return keysize + sizeof(simap_ptr64) + sizeof(uint32_t) + sizeof(uint32_t) + padding; } /** Force-add the (key,value) to the end of the map */ static inline void *simap_force_add_internal(simap_instance *map, const char *key, void *ptr) { uint32_t storage_needed = simap_elem_storage_size(key); assert((storage_needed & 8) == 0); if(map->end - map->usage_end > storage_needed) { /* Need storage */ aralloc(&(map->a), sizeof(uint8_t)/*esize*/, 1 /*align - should be 8 but should be aligned here as-is! */, storage_needed); /* Administer end offset */ map->end += storage_needed; } /* Already have the storage */ /* Create first 8 char encoding (this ensures endianness and all such stuff) */ simap_c64 first8 {0}; uint32_t keylen = strlen(key); strncpy(first8.str8, key, (keylen < 8) ? keylen : 8); uint32_t usi = map->usage_end; uint32_t previ = map->prev_usage_end; /* Save data ptr */ simap_ptr64 *data = (simap_ptr64 *)((uint8_t *)(map->base) + usi); data->ptr = ptr; /* Save link to previous */ uint32_t *usprev = (uint32_t *)((uint8_t *)(map->base) + sizeof(simap_ptr64) + sizeof(uint32_t)); *usprev = previ; /* and nex */ *(usprev + 1) = (uint32_t) -1; /* First 8 bytes */ simap_c64 *start_str = (simap_c64 *)(usprev + 2); *start_str = first8; /* Remainin bytes */ if(keylen > 8) { /* uint32_t key_remains = keylen - 8; */ char *rem_str = (char *)(start_str + 1); strcpy(rem_str, key + 8); } /* Update previous with linkage */ if(previ != (uint32_t)-1) { uint32_t *prevnex = (uint32_t *)((uint8_t *)(map->base) + previ + sizeof(simap_ptr64) + sizeof(uint32_t)); *prevnex = usi; } /* Update prev usage end */ map->prev_usage_end = usi; /* Administer usage_end offset */ map->usage_end += storage_needed; return data; } /** * A simple map data structure that fulfills amap.h * * Operations: * * AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL. * AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns "ptr" if there is no data for the key. * AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL. * * @param amap_instance The instance we operate upon. * @param op Defines which operation the caller wants. * @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc. * @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr and the "nt found" ptr to return in get... * @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error. */ static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) { simap_instance *map = (simap_instance *) amap_instance; if(op == AMAP_ERASE) { map->usage_end = 0; return NULL; } /* Search for the key - also needed for SET in order to "re-set" */ simap_ptr64 *found = simap_search_internal(map, key); if(op == AMAP_GET) { return found ? found->ptr : ptr; } else { assert(op == AMAP_SET); if(found) { /* Just overwrite */ found->ptr = ptr; return (void *) found; } else { return simap_force_add_internal(map, key, ptr); } } assert(false); /* should be unreachable */ return ptr; } #endif /* SIMAP_H */