From 5ecb48815b57c51527f2c55c3555fb40ffe48f6b Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Wed, 1 Oct 2025 01:53:28 +0200 Subject: [PATCH] tpbx: tried removal of relative addressing but it does not help, just makes n be int instead of uint32_t so probably will be reverted. Sad because this actually looked beneficial --- threepass_xbit.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/threepass_xbit.h b/threepass_xbit.h index da05422..2869779 100644 --- a/threepass_xbit.h +++ b/threepass_xbit.h @@ -96,11 +96,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { // Bottom digit a->buf // right-to-left to ensure already sorted digits order we keep for iterations #pragma GCC unroll 48 - for(uint32_t i = n; i > 0; --i) { + for(int i = n - 1; i >= 0; --i) { // Prefetch caches //__builtin_prefetch(&a[i-8]); // Get num and its new offset / location - auto num = a[i - 1]; + auto num = a[i]; auto bkeyni = (num >> shr3) & mask3; auto offset = --bucket3[bkeyni]; @@ -114,11 +114,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { // Mid digit buf->a // right-to-left to ensure already sorted digits order we keep for iterations #pragma GCC unroll 48 - for(uint32_t i = n; i > 0; --i) { + for(int i = n - 1; i >= 0; --i) { // Prefetch caches //__builtin_prefetch(&buf[i-8]); // Get num and its new offset / location - auto num = buf[i - 1]; + auto num = buf[i]; auto bkeyni = (num >> shr2) & mask2; auto offset = --bucket2[bkeyni]; @@ -132,11 +132,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { // Top digit a->buf // right-to-left to ensure already sorted digits order we keep for iterations #pragma GCC unroll 48 - for(uint32_t i = n; i > 0; --i) { + for(int i = n - 1; i >= 0; --i) { // Prefetch caches // __builtin_prefetch(&a[i-16]); // Get num and its new offset / location - auto num = a[i - 1]; + auto num = a[i]; auto bkeyni = (num >> shr1) & mask1; auto offset = --bucket1[bkeyni];