perf tests and smaller perf tunes + some experiments
This commit is contained in:
parent
c1b4b9e97b
commit
6c1adb1655
17
main.cpp
17
main.cpp
@ -2,6 +2,7 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <chrono>
|
||||||
#include "amap.h"
|
#include "amap.h"
|
||||||
#include "simap.h"
|
#include "simap.h"
|
||||||
#include "mapmap.hpp"
|
#include "mapmap.hpp"
|
||||||
@ -51,7 +52,17 @@ inline int *datastore(int i, bool create = false) noexcept {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void measure_speed(amap mapdo, void *map, int max_key) {
|
void test_perf(amap mapdo, void *map, int max_key, const char *what) {
|
||||||
|
auto begin = std::chrono::high_resolution_clock::now();
|
||||||
|
for(int i = 0; i < max_key; ++i) {
|
||||||
|
const char *key = keystore(i);
|
||||||
|
int *data = datastore(i);
|
||||||
|
mapdo(map, AMAP_SET, key, data);
|
||||||
|
}
|
||||||
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
|
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
||||||
|
|
||||||
|
printf("Insertion time for %d elements (%s): %.3f ms.\n", max_key, what, elapsed.count() * 1e-6);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_basics(amap mapdo, void *map) {
|
void test_basics(amap mapdo, void *map) {
|
||||||
@ -108,8 +119,8 @@ int main() {
|
|||||||
keystore(i, true);
|
keystore(i, true);
|
||||||
datastore(i, true);
|
datastore(i, true);
|
||||||
|
|
||||||
measure_speed(simap, &si, i);
|
test_perf(simap, &si, i, "simap");
|
||||||
measure_speed(mapmap, &mi, i);
|
test_perf(mapmap, &mi, i, "std::map");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
2
makefile
2
makefile
@ -2,3 +2,5 @@ debug:
|
|||||||
g++ main.cpp -g -Wall -o main
|
g++ main.cpp -g -Wall -o main
|
||||||
release:
|
release:
|
||||||
g++ main.cpp -O2 -Wall -o main
|
g++ main.cpp -O2 -Wall -o main
|
||||||
|
release-native:
|
||||||
|
g++ main.cpp -march=native -O3 -Wall -o main
|
||||||
|
39
simap.h
39
simap.h
@ -7,6 +7,25 @@
|
|||||||
#include "amap.h"
|
#include "amap.h"
|
||||||
#include "arena.h/arena.h"
|
#include "arena.h/arena.h"
|
||||||
|
|
||||||
|
/* Perf trickery */
|
||||||
|
|
||||||
|
/* I have no idea what MSVC has instead... */
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define SM_THREAD_LOCAL __declspec(thread)
|
||||||
|
#define SM_PREFETCH(x)
|
||||||
|
#define SM_LIKELY(x)
|
||||||
|
#define SM_UNLIKELY(x)
|
||||||
|
#define SM_NOINLINE __declspec(noinline)
|
||||||
|
#define SM_ALWAYS_INLINE __forceinline
|
||||||
|
#else
|
||||||
|
#define SM_THREAD_LOCAL __thread
|
||||||
|
#define SM_PREFETCH(x) __builtin_prefetch(x)
|
||||||
|
#define SM_LIKELY(x) __builtin_expect((x),1)
|
||||||
|
#define SM_UNLIKELY(x) __builtin_expect((x),0)
|
||||||
|
#define SM_NOINLINE __attribute__ ((noinline))
|
||||||
|
#define SM_ALWAYS_INLINE __attribute__ ((always_inline))
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A "peasantly" map data structure backed by arena.h - basically a toy data structure...
|
* A "peasantly" map data structure backed by arena.h - basically a toy data structure...
|
||||||
*
|
*
|
||||||
@ -85,13 +104,14 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char
|
|||||||
/* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */
|
/* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */
|
||||||
const char *keyremains = key + prefixlen;
|
const char *keyremains = key + prefixlen;
|
||||||
|
|
||||||
|
/* TODO: Maybe this is buggy when we access behind our own data? */
|
||||||
/* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */
|
/* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */
|
||||||
/* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */
|
/* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */
|
||||||
uint64_t *base = map->base;
|
uint64_t *base = map->base;
|
||||||
uint64_t *tipp = map->base;
|
uint64_t *tipp = map->base;
|
||||||
for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) {
|
for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) {
|
||||||
/* Fast lookup */
|
/* Fast lookup */
|
||||||
if(*tipp == prefix.u64) {
|
if((*tipp == prefix.u64)) {
|
||||||
/* First check the remains of the string (only if needed) */
|
/* First check the remains of the string (only if needed) */
|
||||||
if(!is_smallkey) {
|
if(!is_smallkey) {
|
||||||
char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t));
|
char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t));
|
||||||
@ -153,11 +173,11 @@ static inline uint32_t simap_elem_storage_size(const char *key) {
|
|||||||
padding;
|
padding;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Force-add the (key,value) to the end of the map */
|
/** Force-add the (key,value) to the end of the map. Use this if you prefill your map one-by-one and need speed */
|
||||||
static inline void *simap_force_add_internal(simap_instance *map, const char *key, void *ptr) {
|
static inline void *simap_force_add(simap_instance *map, const char *key, void *ptr) {
|
||||||
uint32_t storage_needed = simap_elem_storage_size(key);
|
uint32_t storage_needed = simap_elem_storage_size(key);
|
||||||
assert((storage_needed % 8) == 0);
|
assert((storage_needed % 8) == 0);
|
||||||
if(map->end - map->usage_end < storage_needed) {
|
if(SM_UNLIKELY(map->end - map->usage_end < storage_needed)) {
|
||||||
/* Need storage */
|
/* Need storage */
|
||||||
aralloc(&(map->a),
|
aralloc(&(map->a),
|
||||||
sizeof(uint8_t)/*esize*/,
|
sizeof(uint8_t)/*esize*/,
|
||||||
@ -201,7 +221,7 @@ static inline void *simap_force_add_internal(simap_instance *map, const char *ke
|
|||||||
/* XXX: The "padding" gets automagically added by the movement of the arena here(by junk bytes)! */
|
/* XXX: The "padding" gets automagically added by the movement of the arena here(by junk bytes)! */
|
||||||
|
|
||||||
/* Update previous with linkage */
|
/* Update previous with linkage */
|
||||||
if(previ != (uint32_t)-1) {
|
if(SM_LIKELY(previ != (uint32_t)-1)) {
|
||||||
uint32_t *prevnex = (uint32_t *)((uint8_t *)(map->base) + previ +
|
uint32_t *prevnex = (uint32_t *)((uint8_t *)(map->base) + previ +
|
||||||
sizeof(simap_ptr64) +
|
sizeof(simap_ptr64) +
|
||||||
sizeof(uint32_t));
|
sizeof(uint32_t));
|
||||||
@ -235,7 +255,7 @@ static inline void *simap_force_add_internal(simap_instance *map, const char *ke
|
|||||||
static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) {
|
static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) {
|
||||||
simap_instance *map = (simap_instance *) amap_instance;
|
simap_instance *map = (simap_instance *) amap_instance;
|
||||||
|
|
||||||
if(op == AMAP_ERASE) {
|
if((op == AMAP_ERASE)) {
|
||||||
map->prev_usage_end = (uint32_t) -1;
|
map->prev_usage_end = (uint32_t) -1;
|
||||||
map->usage_end = 0;
|
map->usage_end = 0;
|
||||||
return (void *)((uint8_t)(NULL) - 1L);
|
return (void *)((uint8_t)(NULL) - 1L);
|
||||||
@ -249,12 +269,13 @@ static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void
|
|||||||
} else {
|
} else {
|
||||||
assert(op == AMAP_SET);
|
assert(op == AMAP_SET);
|
||||||
|
|
||||||
if(found) {
|
if((!found)) {
|
||||||
|
/* Add as new */
|
||||||
|
return simap_force_add(map, key, ptr);
|
||||||
|
} else {
|
||||||
/* Just overwrite */
|
/* Just overwrite */
|
||||||
found->ptr = ptr;
|
found->ptr = ptr;
|
||||||
return (void *) found;
|
return (void *) found;
|
||||||
} else {
|
|
||||||
return simap_force_add_internal(map, key, ptr);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user