simd-map multimap search fix in the AVX2 code

This commit is contained in:
Richard Thier 2024-10-23 18:06:01 +02:00
parent 41988a0dee
commit 968f95734d
2 changed files with 5 additions and 6 deletions

View File

@ -5,6 +5,6 @@ release:
debug-avx2:
g++ main.cpp -g -mavx2 -Wall -o main
release-avx2:
g++ main.cpp -mavx2 -O3 -Wall -o main
g++ main.cpp -mavx2 -O2 -Wall -o main
release-avx2-asm:
g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O3 -Wall -o main
g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O2 -Wall -o main

View File

@ -103,16 +103,16 @@ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(
/* The 's' means "single" (float precision), and mask will have [0..7] bits set! */
uint32_t mask = (uint32_t) _mm256_movemask_ps((__m256) m);
if(SM_UNLIKELY(mask != 0)) {
int ipc = __builtin_popcount(mask);
/* 00000000 00000000 00000000 01000100 -> 6 */
int i = (31 - __builtin_clz(mask));
uint32_t *ptr = &values[i];
if(SM_LIKELY(ipc == 1) || i >= lane_begin) {
/* Only one match in the lane OR first matching in find/find_all */
if(SM_LIKELY(lane_begin == 0)) {
/* Fast-path: Only one match per lane OR first matching in lane for this find/find_all */
*lane_next_begin = (i + 1) % SM_LANE_SPAN;
return ptr;
} else {
/* We did a find_all(..) AND there is more than one match in the lane
* and its not first find_all(..) on the lane in question...
*
* This might be suboptimal, but not so bad:
*
@ -123,7 +123,6 @@ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(
*
* I guess its fine as it should happen statistically rarely anyways
*/
/* TODO: Can this be solved more optimal by specialized function? */
goto non_simd_modulo;
}
}