diff --git a/threepass_xbit.h b/threepass_xbit.h index da05422..2869779 100644 --- a/threepass_xbit.h +++ b/threepass_xbit.h @@ -96,11 +96,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { // Bottom digit a->buf // right-to-left to ensure already sorted digits order we keep for iterations #pragma GCC unroll 48 - for(uint32_t i = n; i > 0; --i) { + for(int i = n - 1; i >= 0; --i) { // Prefetch caches //__builtin_prefetch(&a[i-8]); // Get num and its new offset / location - auto num = a[i - 1]; + auto num = a[i]; auto bkeyni = (num >> shr3) & mask3; auto offset = --bucket3[bkeyni]; @@ -114,11 +114,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { // Mid digit buf->a // right-to-left to ensure already sorted digits order we keep for iterations #pragma GCC unroll 48 - for(uint32_t i = n; i > 0; --i) { + for(int i = n - 1; i >= 0; --i) { // Prefetch caches //__builtin_prefetch(&buf[i-8]); // Get num and its new offset / location - auto num = buf[i - 1]; + auto num = buf[i]; auto bkeyni = (num >> shr2) & mask2; auto offset = --bucket2[bkeyni]; @@ -132,11 +132,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { // Top digit a->buf // right-to-left to ensure already sorted digits order we keep for iterations #pragma GCC unroll 48 - for(uint32_t i = n; i > 0; --i) { + for(int i = n - 1; i >= 0; --i) { // Prefetch caches // __builtin_prefetch(&a[i-16]); // Get num and its new offset / location - auto num = a[i - 1]; + auto num = a[i]; auto bkeyni = (num >> shr1) & mask1; auto offset = --bucket1[bkeyni];