From 968f95734dea8418b28e01aa1c6e250185f9ad64 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Wed, 23 Oct 2024 18:06:01 +0200 Subject: [PATCH] simd-map multimap search fix in the AVX2 code --- makefile | 4 ++-- simd_map.h | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/makefile b/makefile index a60cf41..e0aa3eb 100644 --- a/makefile +++ b/makefile @@ -5,6 +5,6 @@ release: debug-avx2: g++ main.cpp -g -mavx2 -Wall -o main release-avx2: - g++ main.cpp -mavx2 -O3 -Wall -o main + g++ main.cpp -mavx2 -O2 -Wall -o main release-avx2-asm: - g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O3 -Wall -o main + g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O2 -Wall -o main diff --git a/simd_map.h b/simd_map.h index a63c2af..117f452 100644 --- a/simd_map.h +++ b/simd_map.h @@ -103,16 +103,16 @@ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find( /* The 's' means "single" (float precision), and mask will have [0..7] bits set! */ uint32_t mask = (uint32_t) _mm256_movemask_ps((__m256) m); if(SM_UNLIKELY(mask != 0)) { - int ipc = __builtin_popcount(mask); /* 00000000 00000000 00000000 01000100 -> 6 */ int i = (31 - __builtin_clz(mask)); uint32_t *ptr = &values[i]; - if(SM_LIKELY(ipc == 1) || i >= lane_begin) { - /* Only one match in the lane OR first matching in find/find_all */ + if(SM_LIKELY(lane_begin == 0)) { + /* Fast-path: Only one match per lane OR first matching in lane for this find/find_all */ *lane_next_begin = (i + 1) % SM_LANE_SPAN; return ptr; } else { /* We did a find_all(..) AND there is more than one match in the lane + * and its not first find_all(..) on the lane in question... * * This might be suboptimal, but not so bad: * @@ -123,7 +123,6 @@ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find( * * I guess its fine as it should happen statistically rarely anyways */ - /* TODO: Can this be solved more optimal by specialized function? */ goto non_simd_modulo; } }