thier3: micro-optimized some of the unrolls

This commit is contained in:
Richard Thier 2025-10-01 01:00:07 +02:00
parent 69d1432721
commit 22ec030116

View File

@ -71,7 +71,7 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) {
for(int i = 0; i < 256; ++i) { for(int i = 0; i < 256; ++i) {
bucket[i] = 0; bucket[i] = 0;
} }
#pragma GCC unroll 64 #pragma GCC unroll 128
for(int i = 0; i < n; ++i) { for(int i = 0; i < n; ++i) {
++bucket[witch_bucket3(arr[i])]; ++bucket[witch_bucket3(arr[i])];
} }
@ -105,7 +105,7 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) {
#ifdef NO_EXTRA_BIT #ifdef NO_EXTRA_BIT
/* Move to the buckets */ /* Move to the buckets */
/* Rem.: This also changes bucket[i] so they will point to bucket beginnings */ /* Rem.: This also changes bucket[i] so they will point to bucket beginnings */
#pragma GCC unroll 64 #pragma GCC unroll 128
for(int i = 0; i < n; ++i) { for(int i = 0; i < n; ++i) {
uint32_t num = arr[i]; uint32_t num = arr[i];
uint32_t witch = witch_bucket3(num); uint32_t witch = witch_bucket3(num);
@ -114,7 +114,7 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) {
} }
/* temparr -> arr each bucket and sort them in-place */ /* temparr -> arr each bucket and sort them in-place */
#pragma GCC unroll 64 #pragma GCC unroll 2
for(int b = 0; b < 256; ++b) { for(int b = 0; b < 256; ++b) {
int begin = bucket[b]; int begin = bucket[b];
int end = bucket_end[b]; int end = bucket_end[b];
@ -148,7 +148,7 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) {
} }
/* temparr -> arr each bucket and sort them in-place */ /* temparr -> arr each bucket and sort them in-place */
#pragma GCC unroll 64 #pragma GCC unroll 2
for(int b = 0; b < 256; ++b) { for(int b = 0; b < 256; ++b) {
assert(bucket_leftend[b] == bucket[b]); assert(bucket_leftend[b] == bucket[b]);
int lbegin = bucket_left[b]; int lbegin = bucket_left[b];