From 5a8f34efa01387c133152c61a49f4f9d4dd65b42 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Fri, 12 Sep 2025 01:42:11 +0200 Subject: [PATCH] fixed thiersort2 --- thiersort.h | 1 + thiersort2.h | 21 ++++++++++++--------- ypsu.cpp | 10 ++++++---- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/thiersort.h b/thiersort.h index 9e4d5c7..3dae1fd 100644 --- a/thiersort.h +++ b/thiersort.h @@ -531,6 +531,7 @@ static inline TSU8 ts_radixi( /* Should never happen */ assert(false); + return 0; } /* Forward decl. */ diff --git a/thiersort2.h b/thiersort2.h index 66f0f0d..6446d9b 100644 --- a/thiersort2.h +++ b/thiersort2.h @@ -46,24 +46,25 @@ static inline void thiersort2(uint32_t *arr, uint32_t *temparr, int n, sch_rand_ } /* Count */ - #pragma GCC unroll 4 + #pragma GCC unroll 64 for(int i = 0; i < 256; ++i) { bucket[i] = 0; } - #pragma GCC unroll 4 + #pragma GCC unroll 64 for(int i = 0; i < n; ++i) { ++bucket[witch_bucket(arr[i])]; } /* Prefix sum (like in Magyarsort) */ uint32_t prev = 0; + #pragma GCC unroll 4 for (int i = 0; i < 256; i++) { bucket[i] += prev; prev = bucket[i]; } /* Save end-offsets */ - #pragma GCC unroll 4 + #pragma GCC unroll 64 for(int i = 0; i < 256; ++i) { bucket_end[i] = bucket[i]; } @@ -98,13 +99,13 @@ static inline void thiersort2(uint32_t *arr, uint32_t *temparr, int n, sch_rand_ uint32_t pivot = temparr[i]; #pragma GCC unroll 4 for(int j = begin + 1; j < end; ++j) { - if(temparr[j] == b) { + if(temparr[j] == pivot) { /* swap to front partition */ ++i; uint32_t tmp = temparr[i]; temparr[i] = temparr[j]; temparr[j] = tmp; - } else if(temparr[j] < b) { + } else if(temparr[j] < pivot) { /* copy to left */ arr[smalli++] = temparr[j]; } else { @@ -113,13 +114,15 @@ static inline void thiersort2(uint32_t *arr, uint32_t *temparr, int n, sch_rand_ } } /* Copy the mid elements back */ - #pragma GCC unroll 4 + int target = smalli; + #pragma GCC unroll 64 for(int j = begin; j < i + 1; ++j) { - arr[smalli++] = temparr[j]; + arr[target++] = temparr[j]; } - /* Call schwabsort */ - schwab_sort(arr, begin, end - 1, rstate); + /* Call schwabsort - only to [begin..smalli) and (biggie..end) */ + schwab_sort(arr, begin, smalli - 1, rstate); + schwab_sort(arr, biggi + 1, end - 1, rstate); } } diff --git a/ypsu.cpp b/ypsu.cpp index 2fdd1e5..dfb7207 100644 --- a/ypsu.cpp +++ b/ypsu.cpp @@ -862,6 +862,7 @@ int main(void) { //int n = 5000000; int n = 1000000; //int n = 100000; + //int n = 20001; //int n = 20000; //int n = 1000; //int n = 200; @@ -889,6 +890,7 @@ int main(void) { /* w = v; measure(inputtype, "ska", [&] { ska_sort(std::begin(w), std::end(w)); }); + */ w = v; measure(inputtype, "ska_copy", [&] { std::vector buf(w.size()); @@ -896,11 +898,14 @@ int main(void) { w.swap(buf); } }); - */ w = v; measure(inputtype, "magyar", [&] { MagyarSort::sort(&w[0], w.size()); }); assert(w == expected); + w = v; + measure(inputtype, "gptbuck", [&] { gpt_bucket_sort(&w[0], w.size()); }); + assert(w == expected); + /* w = v; measure(inputtype, "mormord", [&] { mormord_sort(&w[0], w.size()); }); @@ -925,9 +930,6 @@ int main(void) { measure(inputtype, "sp", [&] { spsort(&w[0], w.size()); }); assert(w == expected); w = v; - measure(inputtype, "gptbuck", [&] { gpt_bucket_sort(&w[0], w.size()); }); - assert(w == expected); - w = v; measure(inputtype, "gpt_qsort", [&] { gpt_quicksort(w); }); assert(w == expected); w = v;