From 2aa7de0d40bca5c8713c5edb41a2ff3995a2ea01 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Thu, 2 Oct 2025 08:09:27 +0200 Subject: [PATCH] tiny (hopefully) optimization for tpxb --- threepass_xbit.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/threepass_xbit.h b/threepass_xbit.h index 5c87d27..f388db6 100644 --- a/threepass_xbit.h +++ b/threepass_xbit.h @@ -94,27 +94,27 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { /* right-to-left to ensure already sorted digits order we keep for iterations */ /* Bottom digit a->buf */ - #pragma GCC unroll 80 - for(uint32_t i = n; i > 0; --i) { - uint32_t num = a[i - 1]; + #pragma GCC unroll 64 + for(int i = n - 1; i >= 0; --i) { + uint32_t num = a[i]; /* save rotated into the masked, bucketed loc */ buf[bucket3[num & mask3]--] = (num >> TPBX3) | (num << (32 - TPBX3)); //__builtin_prefetch(&buf[bucket3[num & mask3] - 2]); } /* Mid digit buf->a */ - #pragma GCC unroll 80 - for(uint32_t i = n; i > 0; --i) { - uint32_t num = buf[i - 1]; + #pragma GCC unroll 64 + for(int i = n - 1; i >= 0; --i) { + uint32_t num = buf[i]; /* save rotated into the masked, bucketed loc */ a[bucket2[num & mask2]--] = (num >> TPBX2) | (num << (32 - TPBX2)); //__builtin_prefetch(&a[bucket2[num & mask2] - 2]); } /* Top digit a->buf */ - #pragma GCC unroll 80 - for(uint32_t i = n; i > 0; --i) { - uint32_t num = a[i - 1]; + #pragma GCC unroll 64 + for(int i = n - 1; i >= 0; --i) { + uint32_t num = a[i]; /* Reconstruct the original key in this element by where its stuff is stored */ constexpr int rot = TPBX1 + (32 - TPBX1 - TPBX2 - TPBX3); /* save rotated into the masked, bucketed loc */