From 22ec0301164579f5dc857aa49f065bc097971b82 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Wed, 1 Oct 2025 01:00:07 +0200 Subject: [PATCH] thier3: micro-optimized some of the unrolls --- thiersort3.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/thiersort3.h b/thiersort3.h index c3f4328..383d430 100644 --- a/thiersort3.h +++ b/thiersort3.h @@ -71,7 +71,7 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { for(int i = 0; i < 256; ++i) { bucket[i] = 0; } - #pragma GCC unroll 64 + #pragma GCC unroll 128 for(int i = 0; i < n; ++i) { ++bucket[witch_bucket3(arr[i])]; } @@ -105,7 +105,7 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { #ifdef NO_EXTRA_BIT /* Move to the buckets */ /* Rem.: This also changes bucket[i] so they will point to bucket beginnings */ - #pragma GCC unroll 64 + #pragma GCC unroll 128 for(int i = 0; i < n; ++i) { uint32_t num = arr[i]; uint32_t witch = witch_bucket3(num); @@ -114,7 +114,7 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { } /* temparr -> arr each bucket and sort them in-place */ - #pragma GCC unroll 64 + #pragma GCC unroll 2 for(int b = 0; b < 256; ++b) { int begin = bucket[b]; int end = bucket_end[b]; @@ -148,7 +148,7 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { } /* temparr -> arr each bucket and sort them in-place */ - #pragma GCC unroll 64 + #pragma GCC unroll 2 for(int b = 0; b < 256; ++b) { assert(bucket_leftend[b] == bucket[b]); int lbegin = bucket_left[b];