schwab_sort but can endless loop

This commit is contained in:
Richard Thier 2025-05-08 21:47:30 +02:00
parent d80a061240
commit 5775e6c201
2 changed files with 179 additions and 0 deletions

18
qs.c
View File

@ -4,6 +4,7 @@
#include <stdio.h> #include <stdio.h>
#include "qsort.h" #include "qsort.h"
#include "zssort.h" #include "zssort.h"
#include "schwab_sort.h"
// function to print array elements // function to print array elements
void printArray(int array[], int size) { void printArray(int array[], int size) {
@ -102,6 +103,22 @@ void qs3_sp2() {
printArray(data, n); printArray(data, n);
} }
void schwab() {
#include "data.inc"
int n = sizeof(data) / sizeof(data[0]);
// memory junnnk is enough
uint32_t junk;
sch_rand_state rand = schwab_rand_state(junk);
// perform sort on data
schwab_sort(data, 0, n - 1, &rand);
printf("(schwab) Sorted array in ascending order: \n");
printArray(data, n);
}
int main() { int main() {
qs(); qs();
qsr(); qsr();
@ -109,5 +126,6 @@ int main() {
qs3(); qs3();
qs3_sp(); qs3_sp();
qs3_sp2(); qs3_sp2();
schwab();
return 0; return 0;
} }

161
schwab_sort.h Normal file
View File

@ -0,0 +1,161 @@
#ifndef SWAB_SORT_H
#define SWAB_SORT_H
/* A fast quicksort-like new alg created in Csolnok, Hungary with:
* - 4-way partitioning with 0..5 copies (not swaps) per elem per run
* - ensured O(log2(n)) worst recursion depth
*
* LICENCE: CC-BY, 2025 May 08
* Author: Richárd István Thier (also author of the Magyarsort)
*/
typedef uint32_t sch_rand_state;
/** Create rand state for schwab_sort using a seed - can give 0 if uninterested */
static inline sch_rand_state schwab_rand_state(uint32_t seed) {
return seed;
}
/** 32-bit LCG for fast random generations - from my fastrand.h */
static inline uint32_t schwab_lcg(sch_rand_state *state) {
*state = *state * 1664525u + 1013904223u;
return *state;
}
/** Get pivot index in [0, len-1] without modulus - from my fastrand.h */
static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) {
uint32_t rand = schwab_lcg(state);
/* Multiply by len, take the upper 32 bits of the 64-bit result */
return (uint32_t)(((uint64_t)rand * len) >> 32);
}
/**
* 4-way partitioning
*
* Expects: arr[plo] <= arr[pmid] <= arr[phi]
* Results: arr[low..plo - 1] <= arr[plo..pmid - 1] <= arr[pmid..phi - 1] <= arr[phi.. high]
*
* @param arr The array to partition
* @param low Inclusive smallest index.
* @param high Inclusive highest index.
* @param plo IN-OUT: input low pivot, output index until elements <= low pivot.
* @param pmid IN-OUT: input mid pivot, output index until elements <= mid pivot.
* @param phi IN-OUT: input high pivot, output index until elements <= high pivot.
*/
static inline void schwab_partition(
uint32_t *arr,
int low,
int high,
int *plo,
int *pmid,
int *phi) {
/* Grab pivot values (keys of partitioning) */
uint32_t klo = arr[*plo];
uint32_t kmid = arr[*pmid];
uint32_t khi = arr[*phi];
/* Aren't inclusive end indices of 4 "blocks" - b0 is smallest vals */
int b0 = low, b1 = low, b2 = low, b3 = low;
while(b3 < high + 1) {
/* This I moved to be first for hot code path for constant / smallrange */
if(arr[b3] >= khi) {
++b3;
continue;
}
/* TODO: should be copy of whole element when not just uint32s! */
uint32_t curr = arr[b3];
/* TODO: We can do "ILP-memcpy"s here:
*
* Key from b2->b3, value from b2->b3, key from b1->b2, value from b1... etc
* This is likely faster than calling a memcpy if we code this for not just uint32s!
*/
if(curr < klo) {
arr[b3] = arr[b2];
arr[b2] = arr[b1];
arr[b1] = arr[b0];
arr[b0] = curr;
++b0; ++b1; ++b2; ++b3;
continue;
}
if(curr < kmid) {
arr[b3] = arr[b2];
arr[b2] = arr[b1];
arr[b1] = curr;
++b1; ++b2; ++b3;
} else {
arr[b3] = arr[b2];
arr[b2] = curr;
++b2; ++b3;
}
}
/* Handle output vars as per doc comment */
*plo = b0;
*pmid = b1;
*phi = b2;
}
/** Always at most log(n) space needing 4-way quicksort-like alg */
static inline void schwab_sort(
uint32_t *array,
int low,
int high,
sch_rand_state *state) {
/* Loop handles longest sub-sort-task which ensused log tree depth */
while(low < high) {
int r0 = schwab_pick_pivot(state, (high + 1) - low) + low;
int r1 = schwab_pick_pivot(state, (high + 1) - low) + low;
int plo = (r0 < r1) ? r0 : r1;
int phi = (r0 < r1) ? r1 : r0;
int pmid = schwab_pick_pivot(state, (phi + 1) - plo) + plo;
schwab_partition(array, low, high, &plo, &pmid, &phi);
/* See where NOT to recurse to avoid worst case stack depth */
/* Rem.: These might be "not real" length but we only use them to comparisons */
/* REM.: The "real" lengths might be off-by-one but these are FASTER! */
int lolen = plo - low;
int lomidlen = pmid - plo;
int himidlen = phi - pmid;
int hilen = high -phi;
int lomax = (lolen > lomidlen) ? lolen : lomidlen;
int himax = (hilen > himidlen) ? hilen : himidlen;
/* Rewrite loop for worst subtask goal and recurse others! */
/* Let the branch predictor try to predict input data path */
if(lomax < himax) {
schwab_sort(array, low, plo - 1, state);
schwab_sort(array, plo, pmid - 1, state);
if(hilen > himidlen) {
schwab_sort(array, pmid, phi - 1, state);
low = phi;
/* high = high; */
} else {
schwab_sort(array, phi, high, state);
low = pmid;
high = phi - 1;
}
} else {
schwab_sort(array, pmid, phi - 1, state);
schwab_sort(array, phi, high, state);
if(lolen < lomidlen) {
schwab_sort(array, low, plo - 1, state);
low = plo;
high = pmid - 1;
} else {
schwab_sort(array, plo, pmid - 1, state);
/* low = low; */
high = plo - 1;
}
}
}
}
#endif /* SWAB_SORT_H */