some initial progress
This commit is contained in:
parent
92b03738f5
commit
67e2652363
25
amap.h
Normal file
25
amap.h
Normal file
@ -0,0 +1,25 @@
|
||||
#ifndef AMAP_H
|
||||
#define AMAP_H
|
||||
|
||||
/** Operations possible on the abstract map (setting with NULL should lead to getting NULL (can tombstone it though) */
|
||||
enum AMAP_OP { AMAP_SET = 0, AMAP_GET = 1, AMAP_ERASE = 2 };
|
||||
typedef enum AMAP_OP AMAP_OP;
|
||||
|
||||
/**
|
||||
* Function-abstraction for an abstract map data type.
|
||||
*
|
||||
* Operations:
|
||||
*
|
||||
* AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL. Returns null on error!
|
||||
* AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns NULL if there is no ptr for the key.
|
||||
* AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL.
|
||||
*
|
||||
* @param amap_instance The instance we operate upon.
|
||||
* @param op Defines which operation the caller wants.
|
||||
* @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc.
|
||||
* @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr.
|
||||
* @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error.
|
||||
*/
|
||||
typedef void* (*amap)(void *amap_instance, AMAP_OP op, const char *key, void *ptr);
|
||||
|
||||
#endif /* AMAP_H */
|
8
main.cpp
Normal file
8
main.cpp
Normal file
@ -0,0 +1,8 @@
|
||||
#include <cstdio>
|
||||
#include "amap.h"
|
||||
#include "simap.h"
|
||||
|
||||
|
||||
int main() {
|
||||
return 0;
|
||||
}
|
4
makefile
Normal file
4
makefile
Normal file
@ -0,0 +1,4 @@
|
||||
debug:
|
||||
g++ main.cpp -g -Wall -o main
|
||||
release:
|
||||
g++ main.cpp -O2 -Wall -o main
|
200
simap.h
Normal file
200
simap.h
Normal file
@ -0,0 +1,200 @@
|
||||
#ifndef SIMAP_H
|
||||
#define SIMAP_H
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "amap.h"
|
||||
#include "arena.h/arena.h"
|
||||
|
||||
/** This is to ensure 8byte storage of pointers (with possible padding) */
|
||||
union simap_ptr64 {
|
||||
void *ptr;
|
||||
uint64_t u64;
|
||||
};
|
||||
typedef union simap_ptr64 simap_ptr64;
|
||||
|
||||
/**
|
||||
* A "peasantly" map data structure backed by arena.h - basically a toy data structure...
|
||||
*
|
||||
* This is very simple, no trees, no hashes, just (hopefully) autovectorized linear lookup.
|
||||
* Inserting NULLs to keys happens through tombstoning unless erase happens and we never
|
||||
* shrink memory so please do not add a lot of things then remove a lot of things.
|
||||
*
|
||||
* We also only do heuristics against data being the key so its not "sure" and can fail...
|
||||
*
|
||||
* XXX: So beware that this can FAIL just "statistically most often works"!
|
||||
*
|
||||
* The memory layout after at *base is as follows:
|
||||
*
|
||||
* 8 byte:
|
||||
* - void* value;
|
||||
* - ? padding (only for non-64 bit pointer machines)
|
||||
*
|
||||
* 8 byte:
|
||||
* - uint32_t previndex;
|
||||
* - uint32_t nextindex;
|
||||
*
|
||||
* K x 8 byte:
|
||||
* - char name[]; // inline stored
|
||||
* - padding (divisible by 8)
|
||||
*
|
||||
* ELEMENTS added to it...
|
||||
*
|
||||
* Because of it a lookup is basically via strstr-like with 8byte steps!
|
||||
* with few character names zero-padded in the search term parameter
|
||||
* and if you want check extra validity by jumping back&forth in it.
|
||||
*/
|
||||
struct simap_instance {
|
||||
arena a;
|
||||
uint32_t end;
|
||||
uint32_t usage_end; /* in bytes!!! */
|
||||
uint32_t next_previndex; /* in bytes!!! */
|
||||
/** see doc comment for layout and why uint64_t* is the type */
|
||||
uint64_t *base;
|
||||
};
|
||||
typedef struct simap_instance simap_instance;
|
||||
|
||||
static inline simap_instance simap_create() {
|
||||
simap_instance ret;
|
||||
ret.a = newarena((ptrdiff_t)1 << 33);
|
||||
ret.end = 0;
|
||||
ret.usage_end = 0;
|
||||
ret.next_previndex = 0;
|
||||
ret.base = ((uint64_t*) alloc(&(ret.a), sizeof(uint64_t), sizeof(uint64_t), 1)) /* addr divisible by 8 */
|
||||
+ 1; /* First really addressible thing */
|
||||
return ret;
|
||||
}
|
||||
|
||||
union simap_c64 {
|
||||
char str8[8];
|
||||
uint64_t u64;
|
||||
};
|
||||
typedef union simap_char64 simap_char64;
|
||||
|
||||
static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) {
|
||||
/* Construct prefix (fast-key) */
|
||||
size_t keylen = strlen(key);
|
||||
char is_smallkey = (keylen < 8);
|
||||
|
||||
simap_c64 prefix {0};
|
||||
size_t prefixlen = is_smallkey ? keylen : 8;
|
||||
strncpy(prefix.str8, key, prefixlen);
|
||||
|
||||
/* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */
|
||||
const char *keyremains = key + prefixlen;
|
||||
|
||||
/* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */
|
||||
/* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */
|
||||
uint64_t *base = map->base;
|
||||
uint64_t *tipp = map->base;
|
||||
for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) {
|
||||
/* Fast lookup */
|
||||
if(*tipp == prefix.u64) {
|
||||
/* First check the remains of the string (only if needed) */
|
||||
if(!is_smallkey) {
|
||||
char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t));
|
||||
if(strcmp(keyremains, tippremains) != 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check back & forth (jump validation) */
|
||||
uint32_t previ = *((uint32_t *)(tipp - 1));
|
||||
uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ
|
||||
+ sizeof(simap_ptr64)
|
||||
+ sizeof(uint32_t));
|
||||
|
||||
simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *)base + prevnexi);
|
||||
uint64_t *retipp = (uint64_t *)(((uint8_t *)base + prevnexi)
|
||||
+ sizeof(simap_ptr64) + sizeof(uint32_t) +
|
||||
+ sizeof(uint32_t));
|
||||
|
||||
if(retipp != tipp) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Can have the (statistically checked) pointer */
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
/* Haven't found anything */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/** Gets padding bytes for a size to be padded to divisible alignment */
|
||||
static inline unsigned int get_size_padding(unsigned int size, unsigned int alignment) {
|
||||
// return (alignment - (size % alignment)) % alignment;
|
||||
return (size + alignment - 1) / alignment * alignment;
|
||||
}
|
||||
|
||||
/** Returns the size of the storage needed for the given key */
|
||||
static inline uint32_t simap_elem_storage_size(const char *key) {
|
||||
uint32_t keysize = strlen(key);
|
||||
uint32_t padding = get_size_padding(keysize, 8);
|
||||
|
||||
return keysize +
|
||||
sizeof(simap_ptr64) +
|
||||
sizeof(uint32_t) +
|
||||
sizeof(uint32_t) +
|
||||
padding;
|
||||
}
|
||||
|
||||
/** Force-add the (key,value) to the end of the map */
|
||||
static inline void *simap_force_add_internal(simap_instance *map, const char *key, void *ptr) {
|
||||
uint32_t storage_needed = simap_elem_storage_size(key);
|
||||
if(map->end - map->usage_end > storage_needed) {
|
||||
/* Need storage */
|
||||
/* TODO: Implement */
|
||||
// Soemthin' like = ((T*) alloc(&a, sizeof(T), sizeof(T), 1)) + 1;
|
||||
}
|
||||
|
||||
/* Already have the storage */
|
||||
/* TODO: Implement - beware that I NEED to store the first 8 characters as a simap_c64! */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple map data structure that fulfills amap.h
|
||||
*
|
||||
* Operations:
|
||||
*
|
||||
* AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL.
|
||||
* AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns NULL if there is no ptr for the key.
|
||||
* AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL.
|
||||
*
|
||||
* @param amap_instance The instance we operate upon.
|
||||
* @param op Defines which operation the caller wants.
|
||||
* @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc.
|
||||
* @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr.
|
||||
* @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error.
|
||||
*/
|
||||
static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) {
|
||||
simap_instance *map = (simap_instance *) amap_instance;
|
||||
|
||||
if(op == AMAP_ERASE) {
|
||||
map->usage_end = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Search for the key - also needed for SET in order to "re-set" */
|
||||
simap_ptr64 *found = simap_search_internal(map, key);
|
||||
|
||||
if(op == AMAP_GET) {
|
||||
return found->ptr;
|
||||
} else {
|
||||
assert(op == AMAP_SET);
|
||||
|
||||
if(found) {
|
||||
/* Just overwrite */
|
||||
found->ptr = ptr;
|
||||
return (void *) found;
|
||||
} else {
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* SIMAP_H */
|
Loading…
x
Reference in New Issue
Block a user