From 85aaf4b1a10745f87aed916cc3563cab81d684f5 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Fri, 9 May 2025 01:10:12 +0200 Subject: [PATCH] testing schwab_sort --- top2_partition.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ ypsu.cpp | 15 ++++++++- 2 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 top2_partition.c diff --git a/top2_partition.c b/top2_partition.c new file mode 100644 index 0000000..6eb4699 --- /dev/null +++ b/top2_partition.c @@ -0,0 +1,83 @@ +#include +#include + +#define TOP2(x) ((x) >> 30) + +void swap(uint32_t* a, uint32_t* b) { + uint32_t tmp = *a; + *a = *b; + *b = tmp; +} + +// TODO: instead of swaps, we need a single tmp var + only memcpys!!! +// TODO: we can do "ILP-memcpy": key from b2->b3, value from b2->b3, key from b1->b2, value from b1... +// Rem.: The latter is faster likely than calling memcpy function even though its simd-optimized... +void partition_top2(uint32_t* arr, int n) { + int b0 = 0, b1 = 0, b2 = 0, b3 = 0; + + while (b3 < n) { + uint32_t val = arr[b3]; + uint32_t top = TOP2(val); + + if (top == 0) { + // Cascade: swap into b2, b1, b0 + swap(&arr[b3], &arr[b2]); + swap(&arr[b2], &arr[b1]); + swap(&arr[b1], &arr[b0]); + b0++; b1++; b2++; + } else if (top == 1) { + // Cascade: swap into b2, b1 + swap(&arr[b3], &arr[b2]); + swap(&arr[b2], &arr[b1]); + b1++; b2++; + } else if (top == 2) { + // Swap into b2 + swap(&arr[b3], &arr[b2]); + b2++; + } + // else (top == 3), do nothing + + b3++; + } +} + +int main() { + uint32_t arr[] = { + 0x40000001, // top 2 bits = 01 + 0x00000002, // 00 + 0xC0000003, // 11 + 0x80000004, // 10 + 0x00000005, // 00 + 0x40000006, // 01 + 0xC0000007, // 11 + 0x80000008, // 10 + }; + + int n = sizeof(arr) / sizeof(arr[0]); + + printf("Before:\n"); + for (int i = 0; i < n; ++i) + printf("0x%08X ", arr[i]); + printf("\n"); + + // Optional: Show top 2 bits for verification + printf("\nTop 2 bits:\n"); + for (int i = 0; i < n; ++i) + printf("%u ", TOP2(arr[i])); + printf("\n"); + + partition_top2(arr, n); + + printf("\nAfter:\n"); + for (int i = 0; i < n; ++i) + printf("0x%08X ", arr[i]); + printf("\n"); + + // Optional: Show top 2 bits for verification + printf("\nTop 2 bits:\n"); + for (int i = 0; i < n; ++i) + printf("%u ", TOP2(arr[i])); + printf("\n"); + + return 0; +} diff --git a/ypsu.cpp b/ypsu.cpp index a1fd6f2..880d40c 100644 --- a/ypsu.cpp +++ b/ypsu.cpp @@ -18,6 +18,7 @@ #include "thiersort.h" #include "qsort/qsort.h" #include "qsort/zssort.h" +#include "qsort/schwab_sort.h" #include "qsort/chatgpt_qs.h" // #define MAGYAR_SORT_DEFAULT_REUSE @@ -199,6 +200,14 @@ static inline void do_neoqs(uint32_t *a, int n) noexcept { neoqs(a, 0, n - 1, &state); } +/** schwab */ +static inline void do_schwab(uint32_t *a, int n) noexcept { + assert(n * uint32_t(sizeof(a[0])) <= INT_MAX); + uint32_t junk; + sch_rand_state state = schwab_rand_state(junk); + schwab_sort(a, 0, n - 1, &state); +} + // mormord — Today at 2:27 AM // 1 2 2 2 3 // @@ -924,10 +933,10 @@ int main(void) { w = v; measure(inputtype, "zsr", [&] { do_zsr(&w[0], w.size()); }); assert(w == expected); - */ w = v; measure(inputtype, "qsr3", [&] { do_qsr3(&w[0], w.size()); }); assert(w == expected); + */ w = v; measure(inputtype, "zsr3", [&] { do_zsr3(&w[0], w.size()); }); assert(w == expected); @@ -950,6 +959,10 @@ int main(void) { measure(inputtype, "neoqs", [&] { do_neoqs(&w[0], w.size()); }); assert(w == expected); + w = v; + measure(inputtype, "schwab", [&] { do_schwab(&w[0], w.size()); }); + assert(w == expected); + /* w = v; measure(inputtype, "magbuck", [&] { magyar_bucket_sort(&w[0], w.size()); });