From 5775e6c2018c98aa6d300297cfc260de250c9049 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Thu, 8 May 2025 21:47:30 +0200 Subject: [PATCH] schwab_sort but can endless loop --- qs.c | 18 ++++++ schwab_sort.h | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 schwab_sort.h diff --git a/qs.c b/qs.c index da065cb..c86bd1a 100644 --- a/qs.c +++ b/qs.c @@ -4,6 +4,7 @@ #include #include "qsort.h" #include "zssort.h" +#include "schwab_sort.h" // function to print array elements void printArray(int array[], int size) { @@ -102,6 +103,22 @@ void qs3_sp2() { printArray(data, n); } +void schwab() { + #include "data.inc" + + int n = sizeof(data) / sizeof(data[0]); + + // memory junnnk is enough + uint32_t junk; + sch_rand_state rand = schwab_rand_state(junk); + + // perform sort on data + schwab_sort(data, 0, n - 1, &rand); + + printf("(schwab) Sorted array in ascending order: \n"); + printArray(data, n); +} + int main() { qs(); qsr(); @@ -109,5 +126,6 @@ int main() { qs3(); qs3_sp(); qs3_sp2(); + schwab(); return 0; } diff --git a/schwab_sort.h b/schwab_sort.h new file mode 100644 index 0000000..0142286 --- /dev/null +++ b/schwab_sort.h @@ -0,0 +1,161 @@ +#ifndef SWAB_SORT_H +#define SWAB_SORT_H + +/* A fast quicksort-like new alg created in Csolnok, Hungary with: + * - 4-way partitioning with 0..5 copies (not swaps) per elem per run + * - ensured O(log2(n)) worst recursion depth + * + * LICENCE: CC-BY, 2025 May 08 + * Author: Richárd István Thier (also author of the Magyarsort) + */ + +typedef uint32_t sch_rand_state; + +/** Create rand state for schwab_sort using a seed - can give 0 if uninterested */ +static inline sch_rand_state schwab_rand_state(uint32_t seed) { + return seed; +} + +/** 32-bit LCG for fast random generations - from my fastrand.h */ +static inline uint32_t schwab_lcg(sch_rand_state *state) { + *state = *state * 1664525u + 1013904223u; + return *state; +} + +/** Get pivot index in [0, len-1] without modulus - from my fastrand.h */ +static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) { + uint32_t rand = schwab_lcg(state); + /* Multiply by len, take the upper 32 bits of the 64-bit result */ + return (uint32_t)(((uint64_t)rand * len) >> 32); +} + +/** + * 4-way partitioning + * + * Expects: arr[plo] <= arr[pmid] <= arr[phi] + * Results: arr[low..plo - 1] <= arr[plo..pmid - 1] <= arr[pmid..phi - 1] <= arr[phi.. high] + * + * @param arr The array to partition + * @param low Inclusive smallest index. + * @param high Inclusive highest index. + * @param plo IN-OUT: input low pivot, output index until elements <= low pivot. + * @param pmid IN-OUT: input mid pivot, output index until elements <= mid pivot. + * @param phi IN-OUT: input high pivot, output index until elements <= high pivot. + */ +static inline void schwab_partition( + uint32_t *arr, + int low, + int high, + int *plo, + int *pmid, + int *phi) { + + /* Grab pivot values (keys of partitioning) */ + uint32_t klo = arr[*plo]; + uint32_t kmid = arr[*pmid]; + uint32_t khi = arr[*phi]; + + /* Aren't inclusive end indices of 4 "blocks" - b0 is smallest vals */ + int b0 = low, b1 = low, b2 = low, b3 = low; + + while(b3 < high + 1) { + /* This I moved to be first for hot code path for constant / smallrange */ + if(arr[b3] >= khi) { + ++b3; + continue; + } + + /* TODO: should be copy of whole element when not just uint32s! */ + uint32_t curr = arr[b3]; + + /* TODO: We can do "ILP-memcpy"s here: + * + * Key from b2->b3, value from b2->b3, key from b1->b2, value from b1... etc + * This is likely faster than calling a memcpy if we code this for not just uint32s! + */ + if(curr < klo) { + arr[b3] = arr[b2]; + arr[b2] = arr[b1]; + arr[b1] = arr[b0]; + arr[b0] = curr; + ++b0; ++b1; ++b2; ++b3; + continue; + } + + if(curr < kmid) { + arr[b3] = arr[b2]; + arr[b2] = arr[b1]; + arr[b1] = curr; + ++b1; ++b2; ++b3; + } else { + arr[b3] = arr[b2]; + arr[b2] = curr; + ++b2; ++b3; + } + } + + /* Handle output vars as per doc comment */ + *plo = b0; + *pmid = b1; + *phi = b2; +} + +/** Always at most log(n) space needing 4-way quicksort-like alg */ +static inline void schwab_sort( + uint32_t *array, + int low, + int high, + sch_rand_state *state) { + + /* Loop handles longest sub-sort-task which ensused log tree depth */ + while(low < high) { + int r0 = schwab_pick_pivot(state, (high + 1) - low) + low; + int r1 = schwab_pick_pivot(state, (high + 1) - low) + low; + + int plo = (r0 < r1) ? r0 : r1; + int phi = (r0 < r1) ? r1 : r0; + int pmid = schwab_pick_pivot(state, (phi + 1) - plo) + plo; + + schwab_partition(array, low, high, &plo, &pmid, &phi); + + /* See where NOT to recurse to avoid worst case stack depth */ + /* Rem.: These might be "not real" length but we only use them to comparisons */ + /* REM.: The "real" lengths might be off-by-one but these are FASTER! */ + int lolen = plo - low; + int lomidlen = pmid - plo; + int himidlen = phi - pmid; + int hilen = high -phi; + int lomax = (lolen > lomidlen) ? lolen : lomidlen; + int himax = (hilen > himidlen) ? hilen : himidlen; + + /* Rewrite loop for worst subtask goal and recurse others! */ + /* Let the branch predictor try to predict input data path */ + if(lomax < himax) { + schwab_sort(array, low, plo - 1, state); + schwab_sort(array, plo, pmid - 1, state); + if(hilen > himidlen) { + schwab_sort(array, pmid, phi - 1, state); + low = phi; + /* high = high; */ + } else { + schwab_sort(array, phi, high, state); + low = pmid; + high = phi - 1; + } + } else { + schwab_sort(array, pmid, phi - 1, state); + schwab_sort(array, phi, high, state); + if(lolen < lomidlen) { + schwab_sort(array, low, plo - 1, state); + low = plo; + high = pmid - 1; + } else { + schwab_sort(array, plo, pmid - 1, state); + /* low = low; */ + high = plo - 1; + } + } + } +} + +#endif /* SWAB_SORT_H */