Compare commits

..

2 Commits

Author SHA1 Message Date
Richard Thier
95c759b9e3 swab: fixed == to >= typo - now beats std::sort and is comparison sort fully! 2025-05-09 05:05:40 +02:00
Richard Thier
147ca60672 schwab insertion - but buggy from some previous at n=20 rand 2025-05-09 04:49:31 +02:00

View File

@ -14,6 +14,11 @@
#define SCHWAB_DELTA_THRESHOLD 32 #define SCHWAB_DELTA_THRESHOLD 32
#endif /* SCHWAB_DELTA_THRESHOLD */ #endif /* SCHWAB_DELTA_THRESHOLD */
/** Below this many elements we do insertion sort */
#ifndef SCHWAB_INSERTION_THRESHOLD
#define SCHWAB_INSERTION_THRESHOLD 64
#endif /* SCHWAB_DELTA_THRESHOLD */
typedef uint32_t sch_rand_state; typedef uint32_t sch_rand_state;
/** Create rand state for schwab_sort using a seed - can give 0 if uninterested */ /** Create rand state for schwab_sort using a seed - can give 0 if uninterested */
@ -41,6 +46,22 @@ static inline void schwab_swap(uint32_t *a, uint32_t *b) {
*b = t; *b = t;
} }
/** Simple insertion sort for small cases */
inline void sch_insertion_sort(uint32_t *arr, int low, int high) {
for (int i = low + 1; i <= high; ++i) {
uint32_t key = arr[i];
int j = i;
/* Move elements of arr[0..i-1] that are greater than key */
/* to one position ahead of their current position */
while (j > 0 && arr[j - 1] > key) {
arr[j] = arr[j - 1];
--j;
}
arr[j] = key;
}
}
/** /**
* 3-way partitioning, in middle all the pivot elements. * 3-way partitioning, in middle all the pivot elements.
* *
@ -166,7 +187,7 @@ static inline int schwab_partition(
arr[b3] = arr[b2]; arr[b3] = arr[b2];
arr[b2] = (where == 2) ? arr[b2] : arr[b1]; arr[b2] = (where == 2) ? arr[b2] : arr[b1];
arr[b1] = (where == 1) ? arr[b1] : arr[b0]; arr[b1] = (where >= 1) ? arr[b1] : arr[b0];
++b2; ++b2;
b1 += (where < 2); b1 += (where < 2);
@ -221,57 +242,64 @@ static inline void schwab_sort(
sch_rand_state *state) { sch_rand_state *state) {
/* Loop handles longest sub-sort-task which ensused log tree depth */ /* Loop handles longest sub-sort-task which ensused log tree depth */
/* Loop also handles start condition */
while(low < high) { while(low < high) {
int r0 = schwab_pick_pivot(state, (high + 1) - low) + low; if(high - low > SCHWAB_INSERTION_THRESHOLD) {
int r1 = schwab_pick_pivot(state, (high + 1) - low) + low; int r0 = schwab_pick_pivot(state, (high + 1) - low) + low;
uint32_t klo = array[r0]; int r1 = schwab_pick_pivot(state, (high + 1) - low) + low;
uint32_t khi = array[r1]; uint32_t klo = array[r0];
int plo = r0; uint32_t khi = array[r1];
int phi = r1; int plo = r0;
if(klo > khi) { int phi = r1;
uint32_t ktmp = klo; if(klo > khi) {
klo = khi; uint32_t ktmp = klo;
khi = ktmp; klo = khi;
khi = ktmp;
plo = r1; plo = r1;
phi = r0; phi = r0;
}
uint32_t kmid = klo + (khi - klo) / 2;
int pmid;
int needmid = schwab_partition(array, low, high, &plo, kmid, &pmid, &phi);
/* See where NOT to recurse to avoid worst case stack depth */
/* Rem.: These might be "not real" length but we only use them to comparisons */
/* REM.: The "real" lengths might be off-by-one but these are FASTER! */
int lolen = plo - low;
int hilen = high - phi;
/* Rewrite loop for worst subtask goal and recurse others! */
/* Let the branch predictor try to predict input data path */
/* Rem.: Best would be to check for biggest in all 4 block */
/* But that would complicate codes above this point! */
/* Rem.: Order of operations try to be a cache-friendly as */
/* possible, but had to put loops changes to the end */
if(lolen < hilen) {
schwab_sort(array, low, plo - 1, state);
if(needmid) {
schwab_sort(array, plo, pmid - 1, state);
schwab_sort(array, pmid, phi - 1, state);
} }
low = phi; uint32_t kmid = klo + (khi - klo) / 2;
/* high = high; */
int pmid;
int needmid = schwab_partition(array, low, high, &plo, kmid, &pmid, &phi);
/* See where NOT to recurse to avoid worst case stack depth */
/* Rem.: These might be "not real" length but we only use them to comparisons */
/* REM.: The "real" lengths might be off-by-one but these are FASTER! */
int lolen = plo - low;
int hilen = high - phi;
/* Rewrite loop for worst subtask goal and recurse others! */
/* Let the branch predictor try to predict input data path */
/* Rem.: Best would be to check for biggest in all 4 block */
/* But that would complicate codes above this point! */
/* Rem.: Order of operations try to be a cache-friendly as */
/* possible, but had to put loops changes to the end */
if(lolen < hilen) {
schwab_sort(array, low, plo - 1, state);
if(needmid) {
schwab_sort(array, plo, pmid - 1, state);
schwab_sort(array, pmid, phi - 1, state);
}
low = phi;
/* high = high; */
} else {
schwab_sort(array, phi, high, state);
if(needmid) {
schwab_sort(array, pmid, phi - 1, state);
schwab_sort(array, plo, pmid - 1, state);
}
/* low = low; */
high = plo - 1;
}
} else { } else {
schwab_sort(array, phi, high, state); /* Just do an insertion sort instead */
if(needmid) { sch_insertion_sort(array, low, high);
schwab_sort(array, pmid, phi - 1, state); return;
schwab_sort(array, plo, pmid - 1, state);
}
/* low = low; */
high = plo - 1;
} }
} }
} }