simd-map multimap search fix in the AVX2 code
This commit is contained in:
parent
41988a0dee
commit
968f95734d
4
makefile
4
makefile
@ -5,6 +5,6 @@ release:
|
|||||||
debug-avx2:
|
debug-avx2:
|
||||||
g++ main.cpp -g -mavx2 -Wall -o main
|
g++ main.cpp -g -mavx2 -Wall -o main
|
||||||
release-avx2:
|
release-avx2:
|
||||||
g++ main.cpp -mavx2 -O3 -Wall -o main
|
g++ main.cpp -mavx2 -O2 -Wall -o main
|
||||||
release-avx2-asm:
|
release-avx2-asm:
|
||||||
g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O3 -Wall -o main
|
g++ main.cpp -S -fopt-info-vec-missed -masm=intel -mavx2 -O2 -Wall -o main
|
||||||
|
@ -103,16 +103,16 @@ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(
|
|||||||
/* The 's' means "single" (float precision), and mask will have [0..7] bits set! */
|
/* The 's' means "single" (float precision), and mask will have [0..7] bits set! */
|
||||||
uint32_t mask = (uint32_t) _mm256_movemask_ps((__m256) m);
|
uint32_t mask = (uint32_t) _mm256_movemask_ps((__m256) m);
|
||||||
if(SM_UNLIKELY(mask != 0)) {
|
if(SM_UNLIKELY(mask != 0)) {
|
||||||
int ipc = __builtin_popcount(mask);
|
|
||||||
/* 00000000 00000000 00000000 01000100 -> 6 */
|
/* 00000000 00000000 00000000 01000100 -> 6 */
|
||||||
int i = (31 - __builtin_clz(mask));
|
int i = (31 - __builtin_clz(mask));
|
||||||
uint32_t *ptr = &values[i];
|
uint32_t *ptr = &values[i];
|
||||||
if(SM_LIKELY(ipc == 1) || i >= lane_begin) {
|
if(SM_LIKELY(lane_begin == 0)) {
|
||||||
/* Only one match in the lane OR first matching in find/find_all */
|
/* Fast-path: Only one match per lane OR first matching in lane for this find/find_all */
|
||||||
*lane_next_begin = (i + 1) % SM_LANE_SPAN;
|
*lane_next_begin = (i + 1) % SM_LANE_SPAN;
|
||||||
return ptr;
|
return ptr;
|
||||||
} else {
|
} else {
|
||||||
/* We did a find_all(..) AND there is more than one match in the lane
|
/* We did a find_all(..) AND there is more than one match in the lane
|
||||||
|
* and its not first find_all(..) on the lane in question...
|
||||||
*
|
*
|
||||||
* This might be suboptimal, but not so bad:
|
* This might be suboptimal, but not so bad:
|
||||||
*
|
*
|
||||||
@ -123,7 +123,6 @@ static inline SM_ALWAYS_INLINE uint32_t *simd_map_lane_find(
|
|||||||
*
|
*
|
||||||
* I guess its fine as it should happen statistically rarely anyways
|
* I guess its fine as it should happen statistically rarely anyways
|
||||||
*/
|
*/
|
||||||
/* TODO: Can this be solved more optimal by specialized function? */
|
|
||||||
goto non_simd_modulo;
|
goto non_simd_modulo;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user