schwab: fixed various bugs, now passes magyarsorts all tests until 20k elements but very slow on constant data for some reason - needs checking

This commit is contained in:
Richard Thier 2025-05-09 00:56:06 +02:00
parent 6fcf79bee3
commit c06f02bc94
2 changed files with 39 additions and 57 deletions

View File

@ -1,6 +1,7 @@
int data[] = {
8, 7, 2, 1, 0, 9, 6,1
/*8, 7, 2, 1, 0, 9, 6,
8, 7, 2, 1, 0, 9, 6,1,
8, 7, 2, 1, 0, 9, 6,
8, 7, 2, 1, 0, 9, 6,
8, 7, 2, 1, 0, 9, 6,
8, 7, 2, 1, 0, 9, 6,
8, 7, 2, 1, 0, 9, 6,
@ -9,5 +10,4 @@ int data[] = {
8, 7, 2, 1, 0, 9, 6,
8, 7, 2, 1, 0, 9, 6,
8, 7, 2, 1, 0, 9, 6,
8, 7, 2, 1, 0, 9, 6,*/
};

View File

@ -32,7 +32,7 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) {
/**
* 4-way partitioning
*
* Expects: arr[plo] <= arr[pmid] <= arr[phi]
* Expects: arr[plo] <= kmid <= arr[phi]
* Results: arr[low..plo - 1] <= arr[plo..pmid - 1] <= arr[pmid..phi - 1] <= arr[phi.. high]
*
* Also: Adding together lengths of all results arrays shrinks by 1 compared to start arr.
@ -42,7 +42,8 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) {
* @param low Inclusive smallest index.
* @param high Inclusive highest index.
* @param plo IN-OUT: input low pivot, output index until elements <= low pivot.
* @param pmid IN-OUT: input mid pivot, output index until elements <= mid pivot.
* @param kmid IN: The mid spliting value (like a pivot value, but can be imaginary nonexistent)
* @param pmid OUT: output index until elements <= mid pivot.
* @param phi IN-OUT: input high pivot, output index until elements <= high pivot.
*/
static inline void schwab_partition(
@ -50,9 +51,14 @@ static inline void schwab_partition(
int low,
int high,
int *plo,
uint32_t kmid,
int *pmid,
int *phi) {
/* Keys only - no element copy is made here */
uint32_t klo = arr[*plo];
uint32_t khi = arr[*phi];
/* [*] Swapping arr[phi]<->arr[high] ensures stop condition later */
uint32_t tmphi = arr[*phi];
arr[*phi] = arr[high];
@ -61,11 +67,6 @@ static inline void schwab_partition(
/* Aren't inclusive end indices of 4 "blocks" - b0 is smallest vals */
int b0 = low, b1 = low, b2 = low, b3 = low;
/* Keys only - no element copy is made here */
uint32_t klo = arr[*plo];
uint32_t kmid = arr[*pmid];
uint32_t khi = arr[*phi];
while(b3 < high) {
/* This I moved to be first for hot code path for constant / smallrange */
if(arr[b3] >= khi) {
@ -106,8 +107,9 @@ static inline void schwab_partition(
/* This way we can return bigger index and by that */
/* this always removes an element per run at least */
tmphi = arr[b2];
arr[b2++] = arr[high];
arr[b2] = arr[high];
arr[high] = tmphi;
++b2;
/* Handle output vars as per doc comment */
*plo = b0;
@ -115,7 +117,7 @@ static inline void schwab_partition(
*phi = b2; /* Because of: [*] */
}
/** Always at most log(n) space needing 4-way quicksort-like alg */
/** Swabic-sort its somewhat similar to quicksort but 4-way and tricky */
static inline void schwab_sort(
uint32_t *array,
int low,
@ -126,70 +128,50 @@ static inline void schwab_sort(
while(low < high) {
int r0 = schwab_pick_pivot(state, (high + 1) - low) + low;
int r1 = schwab_pick_pivot(state, (high + 1) - low) + low;
uint32_t k0 = array[r0];
uint32_t k1 = array[r1];
uint32_t klo = array[r0];
uint32_t khi = array[r1];
int plo = r0;
int phi = r1;
if(k0 > k1) {
if(klo > khi) {
uint32_t ktmp = klo;
klo = khi;
khi = ktmp;
plo = r1;
phi = r0;
uint32_t tmp = k0;
k0 = k1;
k1 = tmp;
}
int r2 = schwab_pick_pivot(state, (phi + 1) - plo) + plo;
uint32_t k2 = array[r2];
int pmid = r2;
if(k2 < k0) {
int tmp = plo;
plo = pmid;
pmid = tmp;
} else if(k2 > k1) {
int tmp = phi;
phi = pmid;
pmid = tmp;
}
uint32_t kmid = klo + (khi - klo) / 2;
schwab_partition(array, low, high, &plo, &pmid, &phi);
int pmid;
schwab_partition(array, low, high, &plo, kmid, &pmid, &phi);
/* See where NOT to recurse to avoid worst case stack depth */
/* Rem.: These might be "not real" length but we only use them to comparisons */
/* REM.: The "real" lengths might be off-by-one but these are FASTER! */
int lolen = plo - low;
int lomidlen = pmid - plo;
int himidlen = phi - pmid;
int hilen = high -phi;
int lomax = (lolen > lomidlen) ? lolen : lomidlen;
int himax = (hilen > himidlen) ? hilen : himidlen;
int hilen = high - phi;
/* Rewrite loop for worst subtask goal and recurse others! */
/* Let the branch predictor try to predict input data path */
if(lomax < himax) {
/* Rem.: Best would be to check for biggest in all 4 block */
/* But that would complicate codes above this point! */
/* Rem.: Order of operations try to be a cache-friendly as */
/* possible, but had to put loops changes to the end */
if(lolen < hilen) {
schwab_sort(array, low, plo - 1, state);
schwab_sort(array, plo, pmid - 1, state);
if(hilen > himidlen) {
schwab_sort(array, pmid, phi - 1, state);
low = phi;
/* high = high; */
} else {
schwab_sort(array, phi, high, state);
low = pmid;
high = phi - 1;
}
} else {
schwab_sort(array, pmid, phi - 1, state);
low = phi;
/* high = high; */
} else {
schwab_sort(array, phi, high, state);
if(lolen < lomidlen) {
schwab_sort(array, low, plo - 1, state);
low = plo;
high = pmid - 1;
} else {
schwab_sort(array, plo, pmid - 1, state);
/* low = low; */
high = plo - 1;
}
schwab_sort(array, pmid, phi - 1, state);
schwab_sort(array, plo, pmid - 1, state);
/* low = low; */
high = plo - 1;
}
}
}