Compare commits
2 Commits
41988a0dee
...
e82468acf8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e82468acf8 | ||
|
|
968f95734d |
129
main.cpp
129
main.cpp
@ -33,7 +33,46 @@ inline const char *keystore(int i, bool create = false) noexcept {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates keys or returns the ith key. Used for performance tests.
|
* Creates keys or returns the ith integer key. Used for performance tests.
|
||||||
|
*
|
||||||
|
* Rem.: Generated keys are like this: i, i-1, i-2, ... 1
|
||||||
|
*
|
||||||
|
* @param i When "create" is false, we return the ith key (does not check OOB)
|
||||||
|
* @param create When true, we initialize the keystore with keys generated from 0..i indices.
|
||||||
|
* @returns The ith key when create==false, otherwise undefined.
|
||||||
|
*/
|
||||||
|
inline int *int_keystore(int i, bool create = false) noexcept {
|
||||||
|
static thread_local std::vector<int> keys;
|
||||||
|
|
||||||
|
if(!create) {
|
||||||
|
return &(keys[i]);
|
||||||
|
} else {
|
||||||
|
keys.resize(0);
|
||||||
|
keys.reserve(0);
|
||||||
|
for(int j = 0; j < i; ++j) {
|
||||||
|
keys.push_back(i - j);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inline const char *datastore_int(int i, bool create = false) noexcept {
|
||||||
|
static thread_local std::vector<std::string> keys;
|
||||||
|
|
||||||
|
if(!create) {
|
||||||
|
return keys[i].c_str();
|
||||||
|
} else {
|
||||||
|
keys.resize(0);
|
||||||
|
keys.reserve(0);
|
||||||
|
std::string key = "k";
|
||||||
|
for(int j = 0; j < i; ++j) {
|
||||||
|
keys.push_back(key + std::to_string(j));
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates datas or returns the ith data. Used for performance tests.
|
||||||
*
|
*
|
||||||
* @param i When "create" is false, we return the ith data (does not check OOB)
|
* @param i When "create" is false, we return the ith data (does not check OOB)
|
||||||
* @param create When true, we initialize the datastore with datas generated from 0..i indices.
|
* @param create When true, we initialize the datastore with datas generated from 0..i indices.
|
||||||
@ -124,7 +163,7 @@ void test_basics(amap mapdo, void *map) {
|
|||||||
assert(*iptr == 3);
|
assert(*iptr == 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_stringmaps() {
|
void test_stringmaps(int perf_test_i) {
|
||||||
/* Basic tests */
|
/* Basic tests */
|
||||||
simap_instance si = simap_create();
|
simap_instance si = simap_create();
|
||||||
test_basics(simap, &si);
|
test_basics(simap, &si);
|
||||||
@ -136,12 +175,7 @@ void test_stringmaps() {
|
|||||||
test_basics(unomap, &umi);
|
test_basics(unomap, &umi);
|
||||||
|
|
||||||
/* Performance tests */
|
/* Performance tests */
|
||||||
int i = 100;
|
int i = perf_test_i;
|
||||||
keystore(i, true);
|
|
||||||
datastore(i, true);
|
|
||||||
|
|
||||||
puts("Performance testing stringmaps:");
|
|
||||||
puts("");
|
|
||||||
test_perf(mapmap, &mi, i, "std::map");
|
test_perf(mapmap, &mi, i, "std::map");
|
||||||
test_perf(simap, &si, i, "simap");
|
test_perf(simap, &si, i, "simap");
|
||||||
test_perf(unomap, &umi, i, "std::unordered_map");
|
test_perf(unomap, &umi, i, "std::unordered_map");
|
||||||
@ -214,14 +248,85 @@ void test_simd_map_basics() {
|
|||||||
simd_map_free(&smap);
|
simd_map_free(&smap);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_intmaps() {
|
void test_simd_map_perf(int max_key) {
|
||||||
|
#ifdef __AVX2__
|
||||||
|
puts("...Perf testing simd_map with AVX2...");
|
||||||
|
#elif __SSE2__
|
||||||
|
puts("...Perf testing simd_map with SSE2...");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// XXX: This way we would measure the wrong thing:
|
||||||
|
// simd_map smap = simd_map_create();
|
||||||
|
// Why? To measure the right thing, not the first allocation!
|
||||||
|
simd_map smap = simd_map_create_and_reserve();
|
||||||
|
auto begin = std::chrono::high_resolution_clock::now();
|
||||||
|
for(int i = 0; i < max_key; ++i) {
|
||||||
|
int *key = int_keystore(i);
|
||||||
|
int *data = datastore(i);
|
||||||
|
simd_map_set(&smap, *key, *data);
|
||||||
|
}
|
||||||
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
|
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
||||||
|
|
||||||
|
printf("Insertion time for %d elements (simd_map): %.3f ms.\n", max_key, elapsed.count() * 1e-6);
|
||||||
|
simd_map_free(&smap);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_int_unordered_map(int max_key) {
|
||||||
|
std::unordered_map<int, int> smap;
|
||||||
|
auto begin = std::chrono::high_resolution_clock::now();
|
||||||
|
for(int i = 0; i < max_key; ++i) {
|
||||||
|
int *key = int_keystore(i);
|
||||||
|
int *data = datastore(i);
|
||||||
|
smap[*key] = *data;
|
||||||
|
}
|
||||||
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
|
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
||||||
|
|
||||||
|
printf("Insertion time for %d elements (std::unordered_map<int,int>): %.3f ms.\n", max_key, elapsed.count() * 1e-6);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_int_std_map(int max_key) {
|
||||||
|
std::map<int, int> smap;
|
||||||
|
auto begin = std::chrono::high_resolution_clock::now();
|
||||||
|
for(int i = 0; i < max_key; ++i) {
|
||||||
|
int *key = int_keystore(i);
|
||||||
|
int *data = datastore(i);
|
||||||
|
smap[*key] = *data;
|
||||||
|
}
|
||||||
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
|
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
||||||
|
|
||||||
|
printf("Insertion time for %d elements (std::map<int,int>): %.3f ms.\n", max_key, elapsed.count() * 1e-6);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_intmaps(int perf_test_i) {
|
||||||
/* Basic tests */
|
/* Basic tests */
|
||||||
test_simd_map_basics();
|
// test_simd_map_basics();
|
||||||
|
test_int_std_map(perf_test_i);
|
||||||
|
test_int_unordered_map(perf_test_i);
|
||||||
|
test_simd_map_perf(perf_test_i);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
test_intmaps();
|
int perf_test_i = 100;
|
||||||
test_stringmaps();
|
|
||||||
|
/* Prepare data stores */
|
||||||
|
keystore(perf_test_i, true);
|
||||||
|
int_keystore(perf_test_i, true);
|
||||||
|
datastore(perf_test_i, true);
|
||||||
|
|
||||||
|
/* Tests */
|
||||||
|
puts("");
|
||||||
|
puts("Integer maps...");
|
||||||
|
puts("");
|
||||||
|
test_intmaps(perf_test_i);
|
||||||
|
puts("");
|
||||||
|
puts("String maps...");
|
||||||
|
puts("");
|
||||||
|
test_stringmaps(perf_test_i);
|
||||||
|
puts("");
|
||||||
|
puts("...done!");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
6
makefile
6
makefile
@ -5,6 +5,8 @@ release:
|
|||||||
debug-avx2:
|
debug-avx2:
|
||||||
g++ main.cpp -g -mavx2 -Wall -o main
|
g++ main.cpp -g -mavx2 -Wall -o main
|
||||||
release-avx2:
|
release-avx2:
|
||||||
g++ main.cpp -mavx2 -O3 -Wall -o main
|
g++ main.cpp -mavx2 -O2 -Wall -o main
|
||||||
|
release-avx2-debug:
|
||||||
|
g++ main.cpp -g -mavx2 -O2 -Wall -o main
|
||||||
release-avx2-asm:
|
release-avx2-asm:
|
||||||
g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O3 -Wall -o main
|
g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O2 -Wall -o main
|
||||||
|
|||||||
18
simd_map.h
18
simd_map.h
@ -63,6 +63,9 @@ static inline SM_ALWAYS_INLINE simd_map simd_map_create() {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Creates a simd map instance and pre-reserve space for a few elements */
|
||||||
|
static inline SM_ALWAYS_INLINE simd_map simd_map_create_and_reserve();
|
||||||
|
|
||||||
/** Free all resources held by the map. Returns 0 on errors. */
|
/** Free all resources held by the map. Returns 0 on errors. */
|
||||||
static inline SM_ALWAYS_INLINE char simd_map_free(simd_map *map) {
|
static inline SM_ALWAYS_INLINE char simd_map_free(simd_map *map) {
|
||||||
return freearena(&(map->a));
|
return freearena(&(map->a));
|
||||||
@ -103,16 +106,16 @@ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(
|
|||||||
/* The 's' means "single" (float precision), and mask will have [0..7] bits set! */
|
/* The 's' means "single" (float precision), and mask will have [0..7] bits set! */
|
||||||
uint32_t mask = (uint32_t) _mm256_movemask_ps((__m256) m);
|
uint32_t mask = (uint32_t) _mm256_movemask_ps((__m256) m);
|
||||||
if(SM_UNLIKELY(mask != 0)) {
|
if(SM_UNLIKELY(mask != 0)) {
|
||||||
int ipc = __builtin_popcount(mask);
|
|
||||||
/* 00000000 00000000 00000000 01000100 -> 6 */
|
/* 00000000 00000000 00000000 01000100 -> 6 */
|
||||||
int i = (31 - __builtin_clz(mask));
|
int i = (31 - __builtin_clz(mask));
|
||||||
uint32_t *ptr = &values[i];
|
uint32_t *ptr = &values[i];
|
||||||
if(SM_LIKELY(ipc == 1) || i >= lane_begin) {
|
if(SM_LIKELY(lane_begin == 0)) {
|
||||||
/* Only one match in the lane OR first matching in find/find_all */
|
/* Fast-path: Only one match per lane OR first matching in lane for this find/find_all */
|
||||||
*lane_next_begin = (i + 1) % SM_LANE_SPAN;
|
*lane_next_begin = (i + 1) % SM_LANE_SPAN;
|
||||||
return ptr;
|
return ptr;
|
||||||
} else {
|
} else {
|
||||||
/* We did a find_all(..) AND there is more than one match in the lane
|
/* We did a find_all(..) AND there is more than one match in the lane
|
||||||
|
* and its not first find_all(..) on the lane in question...
|
||||||
*
|
*
|
||||||
* This might be suboptimal, but not so bad:
|
* This might be suboptimal, but not so bad:
|
||||||
*
|
*
|
||||||
@ -123,7 +126,6 @@ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(
|
|||||||
*
|
*
|
||||||
* I guess its fine as it should happen statistically rarely anyways
|
* I guess its fine as it should happen statistically rarely anyways
|
||||||
*/
|
*/
|
||||||
/* TODO: Can this be solved more optimal by specialized function? */
|
|
||||||
goto non_simd_modulo;
|
goto non_simd_modulo;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -373,4 +375,12 @@ static inline int simd_map_remove(simd_map *map, uint32_t key) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Creates a simd map instance and pre-reserve space for a few elements */
|
||||||
|
static inline SM_ALWAYS_INLINE simd_map simd_map_create_and_reserve() {
|
||||||
|
simd_map smap = simd_map_create();
|
||||||
|
simd_map_set(&smap, 42, 42);
|
||||||
|
simd_map_erase(&smap); // Resets the map, but keeps memory reserved!
|
||||||
|
return smap;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user