perf tests and smaller perf tunes + some experiments
This commit is contained in:
parent
c1b4b9e97b
commit
6c1adb1655
17
main.cpp
17
main.cpp
@ -2,6 +2,7 @@
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
#include "amap.h"
|
||||
#include "simap.h"
|
||||
#include "mapmap.hpp"
|
||||
@ -51,7 +52,17 @@ inline int *datastore(int i, bool create = false) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
void measure_speed(amap mapdo, void *map, int max_key) {
|
||||
void test_perf(amap mapdo, void *map, int max_key, const char *what) {
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
for(int i = 0; i < max_key; ++i) {
|
||||
const char *key = keystore(i);
|
||||
int *data = datastore(i);
|
||||
mapdo(map, AMAP_SET, key, data);
|
||||
}
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
||||
|
||||
printf("Insertion time for %d elements (%s): %.3f ms.\n", max_key, what, elapsed.count() * 1e-6);
|
||||
}
|
||||
|
||||
void test_basics(amap mapdo, void *map) {
|
||||
@ -108,8 +119,8 @@ int main() {
|
||||
keystore(i, true);
|
||||
datastore(i, true);
|
||||
|
||||
measure_speed(simap, &si, i);
|
||||
measure_speed(mapmap, &mi, i);
|
||||
test_perf(simap, &si, i, "simap");
|
||||
test_perf(mapmap, &mi, i, "std::map");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
2
makefile
2
makefile
@ -2,3 +2,5 @@ debug:
|
||||
g++ main.cpp -g -Wall -o main
|
||||
release:
|
||||
g++ main.cpp -O2 -Wall -o main
|
||||
release-native:
|
||||
g++ main.cpp -march=native -O3 -Wall -o main
|
||||
|
39
simap.h
39
simap.h
@ -7,6 +7,25 @@
|
||||
#include "amap.h"
|
||||
#include "arena.h/arena.h"
|
||||
|
||||
/* Perf trickery */
|
||||
|
||||
/* I have no idea what MSVC has instead... */
|
||||
#ifdef _MSC_VER
|
||||
#define SM_THREAD_LOCAL __declspec(thread)
|
||||
#define SM_PREFETCH(x)
|
||||
#define SM_LIKELY(x)
|
||||
#define SM_UNLIKELY(x)
|
||||
#define SM_NOINLINE __declspec(noinline)
|
||||
#define SM_ALWAYS_INLINE __forceinline
|
||||
#else
|
||||
#define SM_THREAD_LOCAL __thread
|
||||
#define SM_PREFETCH(x) __builtin_prefetch(x)
|
||||
#define SM_LIKELY(x) __builtin_expect((x),1)
|
||||
#define SM_UNLIKELY(x) __builtin_expect((x),0)
|
||||
#define SM_NOINLINE __attribute__ ((noinline))
|
||||
#define SM_ALWAYS_INLINE __attribute__ ((always_inline))
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A "peasantly" map data structure backed by arena.h - basically a toy data structure...
|
||||
*
|
||||
@ -85,13 +104,14 @@ static inline simap_ptr64 *simap_search_internal(simap_instance *map, const char
|
||||
/* Construct keyremains - might point to the \0 terminator only if smallkey or 8 bytes exactly */
|
||||
const char *keyremains = key + prefixlen;
|
||||
|
||||
/* TODO: Maybe this is buggy when we access behind our own data? */
|
||||
/* TODO: Maybe I should create separate function for fast-lookup returning "next" pointer from a pointer to autovectorize? */
|
||||
/* Lookup prefix (fast-key) - hopefully this gets vectorized (should be)!!! */
|
||||
uint64_t *base = map->base;
|
||||
uint64_t *tipp = map->base;
|
||||
for(uint32_t i = 0; i < map->usage_end / 8; ++i, ++tipp) {
|
||||
/* Fast lookup */
|
||||
if(*tipp == prefix.u64) {
|
||||
if((*tipp == prefix.u64)) {
|
||||
/* First check the remains of the string (only if needed) */
|
||||
if(!is_smallkey) {
|
||||
char *tippremains = (char *)((uint8_t *)tipp + sizeof(uint64_t));
|
||||
@ -153,11 +173,11 @@ static inline uint32_t simap_elem_storage_size(const char *key) {
|
||||
padding;
|
||||
}
|
||||
|
||||
/** Force-add the (key,value) to the end of the map */
|
||||
static inline void *simap_force_add_internal(simap_instance *map, const char *key, void *ptr) {
|
||||
/** Force-add the (key,value) to the end of the map. Use this if you prefill your map one-by-one and need speed */
|
||||
static inline void *simap_force_add(simap_instance *map, const char *key, void *ptr) {
|
||||
uint32_t storage_needed = simap_elem_storage_size(key);
|
||||
assert((storage_needed % 8) == 0);
|
||||
if(map->end - map->usage_end < storage_needed) {
|
||||
if(SM_UNLIKELY(map->end - map->usage_end < storage_needed)) {
|
||||
/* Need storage */
|
||||
aralloc(&(map->a),
|
||||
sizeof(uint8_t)/*esize*/,
|
||||
@ -201,7 +221,7 @@ static inline void *simap_force_add_internal(simap_instance *map, const char *ke
|
||||
/* XXX: The "padding" gets automagically added by the movement of the arena here(by junk bytes)! */
|
||||
|
||||
/* Update previous with linkage */
|
||||
if(previ != (uint32_t)-1) {
|
||||
if(SM_LIKELY(previ != (uint32_t)-1)) {
|
||||
uint32_t *prevnex = (uint32_t *)((uint8_t *)(map->base) + previ +
|
||||
sizeof(simap_ptr64) +
|
||||
sizeof(uint32_t));
|
||||
@ -235,7 +255,7 @@ static inline void *simap_force_add_internal(simap_instance *map, const char *ke
|
||||
static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void *ptr) {
|
||||
simap_instance *map = (simap_instance *) amap_instance;
|
||||
|
||||
if(op == AMAP_ERASE) {
|
||||
if((op == AMAP_ERASE)) {
|
||||
map->prev_usage_end = (uint32_t) -1;
|
||||
map->usage_end = 0;
|
||||
return (void *)((uint8_t)(NULL) - 1L);
|
||||
@ -249,12 +269,13 @@ static inline void* simap(void *amap_instance, AMAP_OP op, const char *key, void
|
||||
} else {
|
||||
assert(op == AMAP_SET);
|
||||
|
||||
if(found) {
|
||||
if((!found)) {
|
||||
/* Add as new */
|
||||
return simap_force_add(map, key, ptr);
|
||||
} else {
|
||||
/* Just overwrite */
|
||||
found->ptr = ptr;
|
||||
return (void *) found;
|
||||
} else {
|
||||
return simap_force_add_internal(map, key, ptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user