some initial progress

This commit is contained in:
Richard Thier 2024-09-29 20:52:18 +02:00
parent 92b03738f5
commit 67e2652363
4 changed files with 237 additions and 0 deletions

25
amap.h Normal file
View File

@ -0,0 +1,25 @@
#ifndef AMAP_H
#define AMAP_H
/** Operations possible on the abstract map (setting with NULL should lead to getting NULL (can tombstone it though) */
enum AMAP_OP { AMAP_SET = 0, AMAP_GET = 1, AMAP_ERASE = 2 };
typedef enum AMAP_OP AMAP_OP;
/**
* Function-abstraction for an abstract map data type.
*
* Operations:
*
* AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL. Returns null on error!
* AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns NULL if there is no ptr for the key.
* AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL.
*
* @param amap_instance The instance we operate upon.
* @param op Defines which operation the caller wants.
* @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc.
* @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr.
* @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error.
*/
typedef void* (*amap)(void *amap_instance, AMAP_OP op, const char *key, void *ptr);
#endif /* AMAP_H */

8
main.cpp Normal file
View File

@ -0,0 +1,8 @@
#include <cstdio>
#include "amap.h"
#include "simap.h"
int main() {
return 0;
}

4
makefile Normal file
View File

@ -0,0 +1,4 @@
debug:
g++ main.cpp -g -Wall -o main
release:
g++ main.cpp -O2 -Wall -o main

200
simap.h Normal file
View File

@ -0,0 +1,200 @@
#ifndef SIMAP_H
#define SIMAP_H
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include "amap.h"
#include "arena.h/arena.h"
/** This is to ensure 8byte storage of pointers (with possible padding) */
union simap_ptr64 {
void *ptr;
uint64_t u64;
};
typedef union simap_ptr64 simap_ptr64;
/**
* A "peasantly" map data structure backed by arena.h - basically a toy data structure...
*
* This is very simple, no trees, no hashes, just (hopefully) autovectorized linear lookup.
* Inserting NULLs to keys happens through tombstoning unless erase happens and we never
* shrink memory so please do not add a lot of things then remove a lot of things.
*
* We also only do heuristics against data being the key so its not "sure" and can fail...
*
* XXX: So beware that this can FAIL just "statistically most often works"!
*
* The memory layout after at *base is as follows:
*
* 8 byte:
* - void* value;
* - ? padding (only for non-64 bit pointer machines)
*
* 8 byte:
* - uint32_t previndex;
* - uint32_t nextindex;
*
* K x 8 byte:
* - char name[]; // inline stored
* - padding (divisible by 8)
*
* ELEMENTS added to it...
*
* Because of it a lookup is basically via strstr-like with 8byte steps!
* with few character names zero-padded in the search term parameter
* and if you want check extra validity by jumping back&forth in it.
*/
struct simap_instance {
arena a;
uint32_t end;
uint32_t usage_end; /* in bytes!!! */
uint32_t next_previndex; /* in bytes!!! */
/** see doc comment for layout and why uint64_t* is the type */
uint64_t *base;
};
typedef struct simap_instance simap_instance;
static inline simap_instance simap_create() {
simap_instance ret;
ret.a = newarena((ptrdiff_t)1 << 33);
ret.end = 0;
ret.usage_end = 0;
ret.next_previndex = 0;
ret.base = ((uint64_t*) alloc(&(ret.a), sizeof(uint64_t), sizeof(uint64_t), 1)) /* addr divisible by 8 */
+ 1; /* First really addressible thing */
return ret;
}
union simap_c64 {
char str8[8];
uint64_t u64;
};
typedef union simap_char64 simap_char64;
static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char *key) {
/* Construct prefix (fast-key) */
size_t keylen = strlen(key);
char is_smallkey = (keylen < 8);
simap_c64 prefix {0};
size_t prefixlen = is_smallkey ? keylen : 8;
strncpy(prefix.str8, key, prefixlen);
/* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */
const char *keyremains = key + prefixlen;
/* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */
/* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */
uint64_t *base = map->base;
uint64_t *tipp = map->base;
for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) {
/* Fast lookup */
if(*tipp == prefix.u64) {
/* First check the remains of the string (only if needed) */
if(!is_smallkey) {
char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t));
if(strcmp(keyremains, tippremains) != 0) {
continue;
}
}
/* Check back & forth (jump validation) */
uint32_t previ = *((uint32_t *)(tipp - 1));
uint32_t prevnexi = *(uint32_t *)(((uint8_t *)base) + previ
+ sizeof(simap_ptr64)
+ sizeof(uint32_t));
simap_ptr64 *ptr = (simap_ptr64 *)((uint8_t *)base + prevnexi);
uint64_t *retipp = (uint64_t *)(((uint8_t *)base + prevnexi)
+ sizeof(simap_ptr64) + sizeof(uint32_t) +
+ sizeof(uint32_t));
if(retipp != tipp) {
continue;
}
/* Can have the (statistically checked) pointer */
return ptr;
}
}
/* Haven't found anything */
return NULL;
}
/** Gets padding bytes for a size to be padded to divisible alignment */
static inline unsigned int get_size_padding(unsigned int size, unsigned int alignment) {
// return (alignment - (size % alignment)) % alignment;
return (size + alignment - 1) / alignment * alignment;
}
/** Returns the size of the storage needed for the given key */
static inline uint32_t simap_elem_storage_size(const char *key) {
uint32_t keysize = strlen(key);
uint32_t padding = get_size_padding(keysize, 8);
return keysize +
sizeof(simap_ptr64) +
sizeof(uint32_t) +
sizeof(uint32_t) +
padding;
}
/** Force-add the (key,value) to the end of the map */
static inline void *simap_force_add_internal(simap_instance *map, const char *key, void *ptr) {
uint32_t storage_needed = simap_elem_storage_size(key);
if(map->end - map->usage_end > storage_needed) {
/* Need storage */
/* TODO: Implement */
// Soemthin' like = ((T*) alloc(&a, sizeof(T), sizeof(T), 1)) + 1;
}
/* Already have the storage */
/* TODO: Implement - beware that I NEED to store the first 8 characters as a simap_c64! */
return NULL;
}
/**
* A simple map data structure that fulfills amap.h
*
* Operations:
*
* AMAP_SET Saves a mapping from key->ptr in map. ptr==NULL "tombstones" the mapping to return NULL.
* AMAP_GET Gets the symbol at key (the ptr parameter is unused). Returns NULL if there is no ptr for the key.
* AMAP_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL.
*
* @param amap_instance The instance we operate upon.
* @param op Defines which operation the caller wants.
* @param key The key (both for SET and GET). This pointer can get easily invalidated so you might need a copy or you do Trie, etc.
* @param ptr When adding a ptr (data) to the map / table, the key will point to this ptr.
* @returns The ptr / data stored for the key, or NULL on tombstone or when not stored yet or op is SET and there was an error.
*/
static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) {
simap_instance *map = (simap_instance *) amap_instance;
if(op == AMAP_ERASE) {
map->usage_end = 0;
return NULL;
}
/* Search for the key - also needed for SET in order to "re-set" */
simap_ptr64 *found = simap_search_internal(map, key);
if(op == AMAP_GET) {
return found->ptr;
} else {
assert(op == AMAP_SET);
if(found) {
/* Just overwrite */
found->ptr = ptr;
return (void *) found;
} else {
}
}
return NULL;
}
#endif /* SIMAP_H */