Compare commits

...

2 Commits

Author SHA1 Message Date
Richard Thier
e82468acf8 added simd_map_create_and_reserve 2024-10-23 21:30:30 +02:00
Richard Thier
968f95734d simd-map multimap search fix in the AVX2 code 2024-10-23 18:06:01 +02:00
3 changed files with 135 additions and 18 deletions

129
main.cpp
View File

@ -33,7 +33,46 @@ inline const char *keystore(int i, bool create = false) noexcept {
} }
/** /**
* Creates keys or returns the ith key. Used for performance tests. * Creates keys or returns the ith integer key. Used for performance tests.
*
* Rem.: Generated keys are like this: i, i-1, i-2, ... 1
*
* @param i When "create" is false, we return the ith key (does not check OOB)
* @param create When true, we initialize the keystore with keys generated from 0..i indices.
* @returns The ith key when create==false, otherwise undefined.
*/
inline int *int_keystore(int i, bool create = false) noexcept {
static thread_local std::vector<int> keys;
if(!create) {
return &(keys[i]);
} else {
keys.resize(0);
keys.reserve(0);
for(int j = 0; j < i; ++j) {
keys.push_back(i - j);
}
return NULL;
}
}
inline const char *datastore_int(int i, bool create = false) noexcept {
static thread_local std::vector<std::string> keys;
if(!create) {
return keys[i].c_str();
} else {
keys.resize(0);
keys.reserve(0);
std::string key = "k";
for(int j = 0; j < i; ++j) {
keys.push_back(key + std::to_string(j));
}
return NULL;
}
}
/**
* Creates datas or returns the ith data. Used for performance tests.
* *
* @param i When "create" is false, we return the ith data (does not check OOB) * @param i When "create" is false, we return the ith data (does not check OOB)
* @param create When true, we initialize the datastore with datas generated from 0..i indices. * @param create When true, we initialize the datastore with datas generated from 0..i indices.
@ -124,7 +163,7 @@ void test_basics(amap mapdo, void *map) {
assert(*iptr == 3); assert(*iptr == 3);
} }
void test_stringmaps() { void test_stringmaps(int perf_test_i) {
/* Basic tests */ /* Basic tests */
simap_instance si = simap_create(); simap_instance si = simap_create();
test_basics(simap, &si); test_basics(simap, &si);
@ -136,12 +175,7 @@ void test_stringmaps() {
test_basics(unomap, &umi); test_basics(unomap, &umi);
/* Performance tests */ /* Performance tests */
int i = 100; int i = perf_test_i;
keystore(i, true);
datastore(i, true);
puts("Performance testing stringmaps:");
puts("");
test_perf(mapmap, &mi, i, "std::map"); test_perf(mapmap, &mi, i, "std::map");
test_perf(simap, &si, i, "simap"); test_perf(simap, &si, i, "simap");
test_perf(unomap, &umi, i, "std::unordered_map"); test_perf(unomap, &umi, i, "std::unordered_map");
@ -214,14 +248,85 @@ void test_simd_map_basics() {
simd_map_free(&smap); simd_map_free(&smap);
} }
void test_intmaps() { void test_simd_map_perf(int max_key) {
#ifdef __AVX2__
puts("...Perf testing simd_map with AVX2...");
#elif __SSE2__
puts("...Perf testing simd_map with SSE2...");
#endif
// XXX: This way we would measure the wrong thing:
// simd_map smap = simd_map_create();
// Why? To measure the right thing, not the first allocation!
simd_map smap = simd_map_create_and_reserve();
auto begin = std::chrono::high_resolution_clock::now();
for(int i = 0; i < max_key; ++i) {
int *key = int_keystore(i);
int *data = datastore(i);
simd_map_set(&smap, *key, *data);
}
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Insertion time for %d elements (simd_map): %.3f ms.\n", max_key, elapsed.count() * 1e-6);
simd_map_free(&smap);
}
void test_int_unordered_map(int max_key) {
std::unordered_map<int, int> smap;
auto begin = std::chrono::high_resolution_clock::now();
for(int i = 0; i < max_key; ++i) {
int *key = int_keystore(i);
int *data = datastore(i);
smap[*key] = *data;
}
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Insertion time for %d elements (std::unordered_map<int,int>): %.3f ms.\n", max_key, elapsed.count() * 1e-6);
}
void test_int_std_map(int max_key) {
std::map<int, int> smap;
auto begin = std::chrono::high_resolution_clock::now();
for(int i = 0; i < max_key; ++i) {
int *key = int_keystore(i);
int *data = datastore(i);
smap[*key] = *data;
}
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Insertion time for %d elements (std::map<int,int>): %.3f ms.\n", max_key, elapsed.count() * 1e-6);
}
void test_intmaps(int perf_test_i) {
/* Basic tests */ /* Basic tests */
test_simd_map_basics(); // test_simd_map_basics();
test_int_std_map(perf_test_i);
test_int_unordered_map(perf_test_i);
test_simd_map_perf(perf_test_i);
} }
int main() { int main() {
test_intmaps(); int perf_test_i = 100;
test_stringmaps();
/* Prepare data stores */
keystore(perf_test_i, true);
int_keystore(perf_test_i, true);
datastore(perf_test_i, true);
/* Tests */
puts("");
puts("Integer maps...");
puts("");
test_intmaps(perf_test_i);
puts("");
puts("String maps...");
puts("");
test_stringmaps(perf_test_i);
puts("");
puts("...done!");
return 0; return 0;
} }

View File

@ -5,6 +5,8 @@ release:
debug-avx2: debug-avx2:
g++ main.cpp -g -mavx2 -Wall -o main g++ main.cpp -g -mavx2 -Wall -o main
release-avx2: release-avx2:
g++ main.cpp -mavx2 -O3 -Wall -o main g++ main.cpp -mavx2 -O2 -Wall -o main
release-avx2-debug:
g++ main.cpp -g -mavx2 -O2 -Wall -o main
release-avx2-asm: release-avx2-asm:
g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O3 -Wall -o main g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O2 -Wall -o main

View File

@ -63,6 +63,9 @@ static inline SM_ALWAYS_INLINE simd_map simd_map_create() {
return ret; return ret;
} }
/** Creates a simd map instance and pre-reserve space for a few elements */
static inline SM_ALWAYS_INLINE simd_map simd_map_create_and_reserve();
/** Free all resources held by the map. Returns 0 on errors. */ /** Free all resources held by the map. Returns 0 on errors. */
static inline SM_ALWAYS_INLINE char simd_map_free(simd_map *map) { static inline SM_ALWAYS_INLINE char simd_map_free(simd_map *map) {
return freearena(&(map->a)); return freearena(&(map->a));
@ -103,16 +106,16 @@ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(
/* The 's' means "single" (float precision), and mask will have [0..7] bits set! */ /* The 's' means "single" (float precision), and mask will have [0..7] bits set! */
uint32_t mask = (uint32_t) _mm256_movemask_ps((__m256) m); uint32_t mask = (uint32_t) _mm256_movemask_ps((__m256) m);
if(SM_UNLIKELY(mask != 0)) { if(SM_UNLIKELY(mask != 0)) {
int ipc = __builtin_popcount(mask);
/* 00000000 00000000 00000000 01000100 -> 6 */ /* 00000000 00000000 00000000 01000100 -> 6 */
int i = (31 - __builtin_clz(mask)); int i = (31 - __builtin_clz(mask));
uint32_t *ptr = &values[i]; uint32_t *ptr = &values[i];
if(SM_LIKELY(ipc == 1) || i >= lane_begin) { if(SM_LIKELY(lane_begin == 0)) {
/* Only one match in the lane OR first matching in find/find_all */ /* Fast-path: Only one match per lane OR first matching in lane for this find/find_all */
*lane_next_begin = (i + 1) % SM_LANE_SPAN; *lane_next_begin = (i + 1) % SM_LANE_SPAN;
return ptr; return ptr;
} else { } else {
/* We did a find_all(..) AND there is more than one match in the lane /* We did a find_all(..) AND there is more than one match in the lane
* and its not first find_all(..) on the lane in question...
* *
* This might be suboptimal, but not so bad: * This might be suboptimal, but not so bad:
* *
@ -123,7 +126,6 @@ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(
* *
* I guess its fine as it should happen statistically rarely anyways * I guess its fine as it should happen statistically rarely anyways
*/ */
/* TODO: Can this be solved more optimal by specialized function? */
goto non_simd_modulo; goto non_simd_modulo;
} }
} }
@ -373,4 +375,12 @@ static inline int simd_map_remove(simd_map *map, uint32_t key) {
return 0; return 0;
} }
/** Creates a simd map instance and pre-reserve space for a few elements */
static inline SM_ALWAYS_INLINE simd_map simd_map_create_and_reserve() {
simd_map smap = simd_map_create();
simd_map_set(&smap, 42, 42);
simd_map_erase(&smap); // Resets the map, but keeps memory reserved!
return smap;
}
#endif #endif