vmap find implementation - untested
This commit is contained in:
parent
673555fdfc
commit
c698fc55c3
113
vmap.h
113
vmap.h
@ -1,16 +1,16 @@
|
|||||||
#ifndef VMAP_H
|
#ifndef VMAP_H
|
||||||
#define VMAP_H
|
#define VMAP_H
|
||||||
/*
|
/*
|
||||||
* A virtual memory misusing flat-ish hashmap optimized with AVX2.
|
* A virtual memory misusing flat-ish hashmap optimized with AVX2 (if available at compilation).
|
||||||
*
|
*
|
||||||
* Structure
|
* Structure
|
||||||
*
|
*
|
||||||
* VMEM
|
* VMEM
|
||||||
* STRUCT
|
* STRUCT
|
||||||
* PRIVATE
|
* INTAPI
|
||||||
* UINTAPI
|
|
||||||
*/
|
*/
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
#include "simd_map_lane.h"
|
#include "simd_map_lane.h"
|
||||||
|
|
||||||
/* VMEM */
|
/* VMEM */
|
||||||
@ -51,12 +51,12 @@ struct vmap_find_res {
|
|||||||
uint32_t lane_abcd_next;
|
uint32_t lane_abcd_next;
|
||||||
/** Meta-data for continuation of the search. In-lane where we search from next time? */
|
/** Meta-data for continuation of the search. In-lane where we search from next time? */
|
||||||
int lane_next_begin;
|
int lane_next_begin;
|
||||||
|
/** Meta-data for continuation of the search. Last value found in lastly looked lane. */
|
||||||
|
uint32_t last_found_lane_val;
|
||||||
};
|
};
|
||||||
typedef struct simd_map_find_res simd_map_find_res;
|
typedef struct simd_map_find_res simd_map_find_res;
|
||||||
|
|
||||||
/* PRIVATE */
|
/* INTAPI */
|
||||||
|
|
||||||
/* UINTAPI */
|
|
||||||
|
|
||||||
static inline vmap create_vmap(uint32_t max_levels) {
|
static inline vmap create_vmap(uint32_t max_levels) {
|
||||||
vmap map{ NULL, 0, max_levels};
|
vmap map{ NULL, 0, max_levels};
|
||||||
@ -76,6 +76,7 @@ static inline vmap_find_res vmap_search_all_begin() {
|
|||||||
ret.level = 0;
|
ret.level = 0;
|
||||||
ret.lane_abcd_next = 0;
|
ret.lane_abcd_next = 0;
|
||||||
ret.lane_next_begin = 0;
|
ret.lane_next_begin = 0;
|
||||||
|
ret.last_found_lane_val = 0;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,6 +97,14 @@ static inline vmap_find_res search_all_vmap(vmap *map, uint32_t key, vmap_find_r
|
|||||||
uint32_t level = prev.level;
|
uint32_t level = prev.level;
|
||||||
/* Probably the loop exists always without this predicate being false */
|
/* Probably the loop exists always without this predicate being false */
|
||||||
while(level <= map->max_levels) {
|
while(level <= map->max_levels) {
|
||||||
|
/* Rare edge-case when last lane element was returned and we continue from it */
|
||||||
|
if(prev.lane_abcd_next > 4) {
|
||||||
|
prev = vmap_search_all_begin();
|
||||||
|
++level;
|
||||||
|
/* prev.level = level; // unnecessary, I hand-optimized out */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* Process 8 bits of the 32-bit circular order - so its not radix, but similar */
|
/* Process 8 bits of the 32-bit circular order - so its not radix, but similar */
|
||||||
uint32_t byt = level % 4;
|
uint32_t byt = level % 4;
|
||||||
// Low 4 bits: page
|
// Low 4 bits: page
|
||||||
@ -104,15 +113,11 @@ static inline vmap_find_res search_all_vmap(vmap *map, uint32_t key, vmap_find_r
|
|||||||
uint32_t page_offset = 1024 * page_no;
|
uint32_t page_offset = 1024 * page_no;
|
||||||
|
|
||||||
/* Top 4 bits: lane. There is 32 lane start positions in the 4k page */
|
/* Top 4 bits: lane. There is 32 lane start positions in the 4k page */
|
||||||
uint32_t lane_no = (key >> (byt * 8 + 4)) && 15;
|
uint32_t lane_no = (key >> (byt * 8 + 4)) && 15
|
||||||
|
+ prev.lane_abcd_next; /* continuations start where we left off */
|
||||||
/* But 4096 / 4 == 1024 elements, which then divided by 16 == 64 uint32_t elems */
|
/* But 4096 / 4 == 1024 elements, which then divided by 16 == 64 uint32_t elems */
|
||||||
uint32_t lane_offset = lane_no * 64;
|
uint32_t lane_offset = lane_no * 64;
|
||||||
|
|
||||||
// FIXME: Rerhink what is needed for continuations!
|
|
||||||
// I think we should store A, B, C and D lane retvals plus where we are
|
|
||||||
// or maybe just the "where we are" and figure out with logic here,
|
|
||||||
// but maybe I need to just save flags (4x1 bytes) for "does lane-ABCD search needed?" as that is faster to simd branch pred?
|
|
||||||
|
|
||||||
/* A lane has 8x32 bit keys, then 8x32 bit values. 16 uint32_t elems. */
|
/* A lane has 8x32 bit keys, then 8x32 bit values. 16 uint32_t elems. */
|
||||||
/* So grab the A, B, C and D candidate lanes for each lane_offset. */
|
/* So grab the A, B, C and D candidate lanes for each lane_offset. */
|
||||||
simd_map_lane *lane_a = (simd_map_lane *) map->data + page_offset + lane_offset;
|
simd_map_lane *lane_a = (simd_map_lane *) map->data + page_offset + lane_offset;
|
||||||
@ -120,28 +125,94 @@ static inline vmap_find_res search_all_vmap(vmap *map, uint32_t key, vmap_find_r
|
|||||||
simd_map_lane *lane_c = lane_b + 1;
|
simd_map_lane *lane_c = lane_b + 1;
|
||||||
simd_map_lane *lane_d = lane_c + 1;
|
simd_map_lane *lane_d = lane_c + 1;
|
||||||
|
|
||||||
|
/* Get which lane we should begin at where */
|
||||||
|
uint32_t lane_a_begin = prev.lane_next_begin;
|
||||||
|
int lane_next_begin = 0;
|
||||||
|
|
||||||
/* Further lanes only needed if ours is fully filled */
|
/* Further lanes only needed if ours is fully filled */
|
||||||
/* Overlay simd and integer units here for perf */
|
/* Overlay simd and integer units here for perf */
|
||||||
uint32_t *afind = simd_map_lane_find(
|
uint32_t *afind = simd_map_lane_find(
|
||||||
lane_a,
|
lane_a,
|
||||||
key,
|
key,
|
||||||
0, /* lane modulo: 0 means until lane end */
|
0, /* lane modulo: 0 means until lane end */
|
||||||
0, /* FIXME - from continuation! */
|
lane_a_begin,
|
||||||
NULL); /* FIXME - we should fill a *lane_next_begin ptr here */
|
&lane_next_begin);
|
||||||
uint32_t bneed = simd_map_lane_last_value(lane_a);
|
uint32_t lasta = simd_map_lane_last_value(lane_a);
|
||||||
|
char bneed = (lasta != 0) && (prev.lane_abcd_next < 3);
|
||||||
if(afind) {
|
if(afind) {
|
||||||
|
ret.lane_next_begin = lane_next_begin;
|
||||||
|
ret.lane_abcd_next = prev.lane_abcd_next + (lane_next_begin == 0);
|
||||||
ret.value_location = afind;
|
ret.value_location = afind;
|
||||||
ret.level = level;
|
ret.level = level;
|
||||||
|
ret.last_found_lane_val = lasta;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
if(bneed) {
|
||||||
|
uint32_t *bfind = simd_map_lane_find(
|
||||||
|
lane_b,
|
||||||
|
key,
|
||||||
|
0, /* lane modulo: 0 means until lane end */
|
||||||
|
0, /* non-a lanes all start from 0 */
|
||||||
|
&lane_next_begin);
|
||||||
|
uint32_t lastb = simd_map_lane_last_value(lane_b);
|
||||||
|
char cneed = (lastb != 0) && (prev.lane_abcd_next < 2);
|
||||||
|
if(bfind) {
|
||||||
|
ret.lane_next_begin = lane_next_begin;
|
||||||
|
ret.lane_abcd_next = prev.lane_abcd_next + (lane_next_begin == 0);
|
||||||
|
ret.value_location = bfind;
|
||||||
|
ret.level = level;
|
||||||
|
ret.last_found_lane_val = lastb;
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
/* TODO: Implement B, C and D */
|
|
||||||
|
|
||||||
uint32_t cneed = simd_map_lane_last_value(lane_b);
|
if(cneed) {
|
||||||
uint32_t dneed = simd_map_lane_last_value(lane_c);
|
uint32_t *cfind = simd_map_lane_find(
|
||||||
|
lane_c,
|
||||||
|
key,
|
||||||
|
0, /* lane modulo: 0 means until lane end */
|
||||||
|
0, /* non-a lanes all start from 0 */
|
||||||
|
&lane_next_begin);
|
||||||
|
uint32_t lastc = simd_map_lane_last_value(lane_c);
|
||||||
|
char dneed = (lastc != 0) && (prev.lane_abcd_next < 1);
|
||||||
|
if(cfind) {
|
||||||
|
ret.lane_next_begin = lane_next_begin;
|
||||||
|
ret.lane_abcd_next = prev.lane_abcd_next + (lane_next_begin == 0);
|
||||||
|
ret.value_location = cfind;
|
||||||
|
ret.level = level;
|
||||||
|
ret.last_found_lane_val = lastc;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/* Check if we need to jump to the next level and do */
|
if(dneed) {
|
||||||
uint32_t more = simd_map_lane_last_value(lane_c);
|
uint32_t *dfind = simd_map_lane_find(
|
||||||
if(!more) return ret;
|
lane_d,
|
||||||
|
key,
|
||||||
|
0, /* lane modulo: 0 means until lane end */
|
||||||
|
0, /* non-a lanes all start from 0 */
|
||||||
|
&lane_next_begin);
|
||||||
|
uint32_t lastd = simd_map_lane_last_value(lane_d);
|
||||||
|
char next_level = (lastd != 0);
|
||||||
|
if(dfind) {
|
||||||
|
ret.lane_next_begin = lane_next_begin;
|
||||||
|
ret.lane_abcd_next = prev.lane_abcd_next + (lane_next_begin == 0);
|
||||||
|
ret.value_location = dfind;
|
||||||
|
ret.level = level;
|
||||||
|
ret.last_found_lane_val = lastd;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check to avoid next level (stop iteration) */
|
||||||
|
if(!next_level) {
|
||||||
|
return vmap_search_all_begin();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Next level needs checking */
|
||||||
|
prev = vmap_search_all_begin();
|
||||||
++level;
|
++level;
|
||||||
|
/* prev.level = level; // unnecessary, I hand-optimized out */
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user