#ifndef SIMAP_H #define SIMAP_H #include #include #include #include #include "amap.h" #include "arena.h/arena.h" /* Perf trickery */ /* I have no idea what MSVC has instead... */ #ifdef _MSC_VER #define SM_THREAD_LOCAL __declspec(thread) #define SM_PREFETCH(x) #define SM_LIKELY(x) #define SM_UNLIKELY(x) #define SM_NOINLINE __declspec(noinline) #define SM_ALWAYS_INLINE __forceinline #else #define SM_THREAD_LOCAL __thread #define SM_PREFETCH(x) __builtin_prefetch(x) #define SM_LIKELY(x) __builtin_expect((x),1) #define SM_UNLIKELY(x) __builtin_expect((x),0) #define SM_NOINLINE __attribute__ ((noinline)) #define SM_ALWAYS_INLINE __attribute__ ((always_inline)) #endif typedef uint64_t auint64 __attribute__ ((__aligned__(8))); /** The first 8 characters are stored as uint64_t for fast checks */ union simap_c64 { char str8[8]; auint64 u64; }; typedef union simap_char64 simap_char64; /** This is to ensure 8byte storage of pointers (with possible padding) */ union simap_ptr64 { void *ptr; auint64 u64; }; typedef union simap_ptr64 simap_ptr64; /** * The per-element storage layout * * 8 byte: * - void* value; * - [?] optional padding (only for non-64 bit pointer machines) * * 8 byte: * - uint32_t previndex; * - uint32_t nextindex; * * K x 8 byte: * - char name[]; // inline stored * - padding (divisible by 8) * * Because of it a lookup is basically via strstr-like with 8byte steps! * with few character names zero-padded in the search term parameter * and if you want check extra validity by jumping back&forth in it. */ struct elem_prefix { /** The value (ptr) */ simap_ptr64 value; /** Previous element index from base (full offset) */ uint32_t previndex; /** Next element index from base (full offset) */ uint32_t nextindex; /** The prefix of the key */ simap_c64 key_prefix; }; typedef struct elem_prefix elem_prefix; /** * A "peasantly" map data structure backed by arena.h - basically a toy data structure... * * This is very simple, no trees, no hashes, just (hopefully) autovectorized linear lookup. * Inserting NULLs to keys happens through tombstoning unless erase happens and we never * shrink memory so please do not add a lot of things then remove a lot of things. * * In AVX2 mode, we do heuristics against data being the key so its not "sure" and can fail... * XXX: So beware that this CAN FAIL for AVX2 build flags just "statistically most often works"! */ struct simap_instance { arena a; uint32_t end; uint32_t prev_usage_end; /* previous usage_end or -1 if no previous exists... in bytes!!! */ uint32_t usage_end; /* in bytes!!! */ elem_prefix *base; }; typedef struct simap_instance simap_instance; static inline simap_instance simap_create() { simap_instance ret; ret.a = newarena((ptrdiff_t)1 << 33); ret.end = 0; ret.prev_usage_end = (uint32_t) -1; ret.usage_end = 0; ret.base = (elem_prefix*)(((auint64*) aralloc(&(ret.a), sizeof(auint64), sizeof(auint64), 1)) /* addr divisible by 8 */ + 1); /* First really addressible thing */ return ret; } static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr); // TODO: We can possibly hand-optimise this with intrinsics maybe - but I hope autovectorization (does not seem to happen???) static inline SM_ALWAYS_INLINE auint64 *make_tipp(auint64 *tip, auint64 prefix, auint64 *end) { #pragma GCC unroll 4 while((tip < end) && (*tip != prefix)) ++tip; return tip; } static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) { /* Construct prefix (fast-key) */ size_t keylen = strlen(key); char is_smallkey = (keylen < 8); simap_c64 prefix {0}; size_t prefixlen = is_smallkey ? keylen : 8; strncpy(prefix.str8, key, prefixlen); /* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */ const char *keyremains = key + prefixlen; /* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */ auint64 *base = (auint64 *) (map->base); auint64 *end = (auint64 *)((uint8_t *)base + (map->usage_end)); auint64 *tipp = make_tipp(base, prefix.u64, end); while(tipp < end) { /* Need detailed lookup, because found the prefix */ assert((*tipp == prefix.u64)); /* First check the remains of the string (only if needed) */ if(!is_smallkey) { char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t)); if(strcmp(keyremains, tippremains) != 0) { tipp = make_tipp(tipp + 1, prefix.u64, end); continue; } } simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *) (tipp - 2)); /* Check back & forth (jump validation) */ uint32_t previ = *((uint32_t *)(tipp - 1)); if(previ == (uint32_t) -1) { /* Expect it be good if it was first insert ever? Statistically rare to be not like it */ return ptr; } uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ + sizeof(simap_ptr64) + sizeof(uint32_t)); auint64 *retipp = (auint64 *)(((uint8_t *)base + prevnexi) + sizeof(simap_ptr64) + sizeof(uint32_t) + + sizeof(uint32_t)); if(retipp != tipp) { tipp = make_tipp(tipp + 1, prefix.u64, end); continue; } /* Can have the (statistically checked) pointer */ return ptr; } /* Haven't found anything */ return NULL; } /** Gets padding bytes for a size to be padded to divisible alignment */ static inline unsigned int get_size_padding(unsigned int size, unsigned int alignment) { /* Would ensure returned value divisible by alignment */ /* return (size + alignment - 1) / alignment * alignment; */ /* same: return (alignment - (size % alignment)) % alignment; */ /* Substracting size leads to padding */ return ((size + alignment - 1) / alignment) * alignment - size; } /** Returns the size of the storage needed for the given key */ static inline uint32_t simap_elem_storage_size(const char *key) { uint32_t keysize = strlen(key); uint32_t padding = get_size_padding(keysize, 8); /* XXX: The exactly 8byte keys need a zero terminator too (would be overridden without this) */ padding += (keysize == 8) ? 8 : 0; return keysize + sizeof(simap_ptr64) + sizeof(uint32_t) + sizeof(uint32_t) + padding; } /** Force-add the (key,value) to the end of the map. Use this if you prefill your map one-by-one and need speed */ static inline void *simap_force_add(simap_instance *map, const char *key, void *ptr) { uint32_t storage_needed = simap_elem_storage_size(key); assert((storage_needed % 8) == 0); if(SM_UNLIKELY(map->end - map->usage_end < storage_needed)) { /* Need storage */ aralloc(&(map->a), sizeof(uint8_t)/*esize*/, 1 /*align - should be 8 but should be aligned here as-is! */, storage_needed); /* Administer end offset */ map->end += storage_needed; } /* Already have the storage */ /* Create first 8 char encoding (this ensures endianness and all such stuff) */ simap_c64 first8 {0}; uint32_t keylen = strlen(key); strncpy(first8.str8, key, (keylen < 8) ? keylen : 8); uint32_t usi = map->usage_end; uint32_t previ = map->prev_usage_end; /* 8byte: Save data ptr */ simap_ptr64 *data = (simap_ptr64 *)((uint8_t *)(map->base) + usi); data->ptr = ptr; /* 8byte: Save link to previous and next */ uint32_t *usprev = (uint32_t *)((uint8_t *)(map->base) + usi + sizeof(simap_ptr64)); *usprev = previ; *(usprev + 1) = (uint32_t) -1; /* 8byte: First 8 char */ simap_c64 *start_str = (simap_c64 *)(usprev + 2); *start_str = first8; /* Remaining bytes */ if(keylen >= 8) { /* uint32_t key_remains = keylen - 8; */ char *rem_str = (char *)(start_str + 1); strcpy(rem_str, key + 8); } /* XXX: The "padding" gets automagically added by the movement of the arena here(by junk bytes)! */ /* Update previous with linkage */ if(SM_LIKELY(previ != (uint32_t)-1)) { uint32_t *prevnex = (uint32_t *)((uint8_t *)(map->base) + previ + sizeof(simap_ptr64) + sizeof(uint32_t)); *prevnex = usi; } /* Update prev usage end */ map->prev_usage_end = usi; /* Administer usage_end offset */ map->usage_end += storage_needed; return data; } /** * A simple map data structure that fulfills amap.h * * Operations: * * AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL. * AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns "ptr" if there is no data for the key. * AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL. * * @param amap_instance The instance we operate upon. * @param op Defines which operation the caller wants. * @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc. * @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr and the "nt found" ptr to return in get... * @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error. */ static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) { simap_instance *map = (simap_instance *) amap_instance; if((op == AMAP_ERASE)) { map->prev_usage_end = (uint32_t) -1; map->usage_end = 0; return (void *)((uint8_t)(NULL) - 1L); } /* Search for the key - also needed for SET in order to "re-set" */ simap_ptr64 *found = simap_search_internal(map, key); if(op == AMAP_GET) { return found ? found->ptr : ptr; } else { assert(op == AMAP_SET); if((!found)) { /* Add as new */ return simap_force_add(map, key, ptr); } else { /* Just overwrite */ found->ptr = ptr; return (void *) found; } } assert(false); /* should be unreachable */ return ptr; } #endif /* SIMAP_H */