From 19cbf5359766e1d67dc92eca9553f391af480bc5 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Fri, 9 May 2025 02:16:23 +0200 Subject: [PATCH] schwab: neoqs-like performance with no issues of smallrange and const because partitioning might do threeway when sensing possible issue --- schwab_sort.h | 115 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 102 insertions(+), 13 deletions(-) diff --git a/schwab_sort.h b/schwab_sort.h index f026d40..3c1f693 100644 --- a/schwab_sort.h +++ b/schwab_sort.h @@ -1,14 +1,19 @@ -#ifndef SWAB_SORT_H -#define SWAB_SORT_H +#ifndef SCHWAB_SORT_H +#define SCHWAB_SORT_H /* A fast quicksort-like new alg created in Csolnok, Hungary with: * - 4-way partitioning with 0..5 copies (not swaps) per elem per run * - ensured O(log2(n)) worst recursion depth * - * LICENCE: CC-BY, 2025 May 08 + * LICENCE: CC-BY, 2025 May 08-09 * Author: Richárd István Thier (also author of the Magyarsort) */ +/** 3-way optimization for smallrange and const - 0 turns this off! */ +#ifndef SCHWAB_DELTA_THRESHOLD +#define SCHWAB_DELTA_THRESHOLD 32 +#endif /* SCHWAB_DELTA_THRESHOLD */ + typedef uint32_t sch_rand_state; /** Create rand state for schwab_sort using a seed - can give 0 if uninterested */ @@ -29,6 +34,71 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) { return (uint32_t)(((uint64_t)rand * len) >> 32); } +/** Swap operation */ +static inline void schwab_swap(uint32_t *a, uint32_t *b) { + uint32_t t = *a; + *a = *b; + *b = t; +} + +/** + * 3-way partitioning, in middle all the pivot elements. + * + * Single-pass version 2.0, taken from qsort.h + * + * @param array The array to partition + * @param low From when. (inclusive) + * @param high Until when. (inclusive too!) + * @param pivotval This value is used to partition the array. + * @param plo OUT: until this, more processing might needed. + * @param phi OUT: from this, more processing might needed. + */ +static inline void schwab_partition3sp2( + uint32_t *array, + int low, + int high, + uint32_t pivotval, + int *plo, + int *phi) { + + /* Invariant for left: index until smaller (than pivot) elements lay */ + int il = (low - 1); + /* Invariant for right: index until (from top) bigger elements lay */ + int ir = (high + 1); + /* Indices from where we swap left and right into "is" (and sometimes swap among here too) */ + int jl = low; + int jr = high; + + while(jl <= jr) { + /* Handle left and find wrongly placed element */ + while((array[jl] <= pivotval) && (jl <= jr)) { + int isNonPivot = (array[jl] != pivotval); + int nonSameIndex = (il + 1 != jl); + if(isNonPivot & nonSameIndex) + schwab_swap(&array[il + 1], &array[jl]); + il += isNonPivot; + ++jl; + } + + /* Handle right and find wrongly placed element */ + while((array[jr] >= pivotval) && (jl <= jr)) { + int isNonPivot = (array[jr] != pivotval); + int nonSameIndex = (ir - 1 != jr); + if(isNonPivot & nonSameIndex) + schwab_swap(&array[ir - 1], &array[jr]); + ir -= isNonPivot; + --jr; + } + + /* Swap the two found elements that are wrongly placed */ + if(jl < jr) schwab_swap(&array[jl], &array[jr]); + } + + /* Output the partition points */ + *plo = il + 1; /* XXX: changed from qsort.h to +1 here! */ + *phi = ir; +} + /** * 4-way partitioning * @@ -41,12 +111,13 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) { * @param arr The array to partition * @param low Inclusive smallest index. * @param high Inclusive highest index. - * @param plo IN-OUT: input low pivot, output index until elements <= low pivot. + * @param plo IN-OUT: input low pivot, output - see "Results:" * @param kmid IN: The mid spliting value (like a pivot value, but can be imaginary nonexistent) - * @param pmid OUT: output index until elements <= mid pivot. - * @param phi IN-OUT: input high pivot, output index until elements <= high pivot. + * @param pmid OUT: output - see "Results:" + * @param phi IN-OUT: input high pivot, output - see "Results:" + * @returns 1 if there is need to process the mid two blocks! Otherwise 0. */ -static inline void schwab_partition( +static inline int schwab_partition( uint32_t *arr, int low, int high, @@ -59,6 +130,17 @@ static inline void schwab_partition( uint32_t klo = arr[*plo]; uint32_t khi = arr[*phi]; + /* Without this, constant and smallrange is very slooOOoow */ + if(khi - klo < SCHWAB_DELTA_THRESHOLD) { + /* Use three-way which defeats smallrange */ + /* Outer sort func also optimized for two sides */ + /* check for size for which recurse which not! */ + schwab_partition3sp2(arr, low, high, kmid, plo, phi); + + /* No need to process the midle two blocks - all pivot there */ + return 0; + } + /* [*] Swapping arr[phi]<->arr[high] ensures stop condition later */ uint32_t tmphi = arr[*phi]; arr[*phi] = arr[high]; @@ -115,6 +197,9 @@ static inline void schwab_partition( *plo = b0; *pmid = b1; *phi = b2; /* Because of: [*] */ + + /* There are mid parts to process */ + return 1; } /** Swabic-sort its somewhat similar to quicksort but 4-way and tricky */ @@ -144,7 +229,7 @@ static inline void schwab_sort( uint32_t kmid = klo + (khi - klo) / 2; int pmid; - schwab_partition(array, low, high, &plo, kmid, &pmid, &phi); + int needmid = schwab_partition(array, low, high, &plo, kmid, &pmid, &phi); /* See where NOT to recurse to avoid worst case stack depth */ /* Rem.: These might be "not real" length but we only use them to comparisons */ @@ -160,15 +245,19 @@ static inline void schwab_sort( /* possible, but had to put loops changes to the end */ if(lolen < hilen) { schwab_sort(array, low, plo - 1, state); - schwab_sort(array, plo, pmid - 1, state); - schwab_sort(array, pmid, phi - 1, state); + if(needmid) { + schwab_sort(array, plo, pmid - 1, state); + schwab_sort(array, pmid, phi - 1, state); + } low = phi; /* high = high; */ } else { schwab_sort(array, phi, high, state); - schwab_sort(array, pmid, phi - 1, state); - schwab_sort(array, plo, pmid - 1, state); + if(needmid) { + schwab_sort(array, pmid, phi - 1, state); + schwab_sort(array, plo, pmid - 1, state); + } /* low = low; */ high = plo - 1; @@ -176,4 +265,4 @@ static inline void schwab_sort( } } -#endif /* SWAB_SORT_H */ +#endif /* SCHWAB_SORT_H */