Compare commits
No commits in common. "7d1d93a89c32444be7d3eca86af717ea236647fe" and "c06f02bc9446e4e1be54b1767980ac8138074203" have entirely different histories.
7d1d93a89c
...
c06f02bc94
122
schwab_sort.h
122
schwab_sort.h
@ -1,19 +1,14 @@
|
|||||||
#ifndef SCHWAB_SORT_H
|
#ifndef SWAB_SORT_H
|
||||||
#define SCHWAB_SORT_H
|
#define SWAB_SORT_H
|
||||||
|
|
||||||
/* A fast quicksort-like new alg created in Csolnok, Hungary with:
|
/* A fast quicksort-like new alg created in Csolnok, Hungary with:
|
||||||
* - 4-way partitioning with 0..5 copies (not swaps) per elem per run
|
* - 4-way partitioning with 0..5 copies (not swaps) per elem per run
|
||||||
* - ensured O(log2(n)) worst recursion depth
|
* - ensured O(log2(n)) worst recursion depth
|
||||||
*
|
*
|
||||||
* LICENCE: CC-BY, 2025 May 08-09
|
* LICENCE: CC-BY, 2025 May 08
|
||||||
* Author: Richárd István Thier (also author of the Magyarsort)
|
* Author: Richárd István Thier (also author of the Magyarsort)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** 3-way optimization for smallrange and const - 0 turns this off! */
|
|
||||||
#ifndef SCHWAB_DELTA_THRESHOLD
|
|
||||||
#define SCHWAB_DELTA_THRESHOLD 32
|
|
||||||
#endif /* SCHWAB_DELTA_THRESHOLD */
|
|
||||||
|
|
||||||
typedef uint32_t sch_rand_state;
|
typedef uint32_t sch_rand_state;
|
||||||
|
|
||||||
/** Create rand state for schwab_sort using a seed - can give 0 if uninterested */
|
/** Create rand state for schwab_sort using a seed - can give 0 if uninterested */
|
||||||
@ -34,71 +29,6 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) {
|
|||||||
return (uint32_t)(((uint64_t)rand * len) >> 32);
|
return (uint32_t)(((uint64_t)rand * len) >> 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Swap operation */
|
|
||||||
static inline void schwab_swap(uint32_t *a, uint32_t *b) {
|
|
||||||
uint32_t t = *a;
|
|
||||||
*a = *b;
|
|
||||||
*b = t;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 3-way partitioning, in middle all the pivot elements.
|
|
||||||
*
|
|
||||||
* Single-pass version 2.0, taken from qsort.h
|
|
||||||
*
|
|
||||||
* @param array The array to partition
|
|
||||||
* @param low From when. (inclusive)
|
|
||||||
* @param high Until when. (inclusive too!)
|
|
||||||
* @param pivotval This value is used to partition the array.
|
|
||||||
* @param plo OUT: until this, more processing might needed.
|
|
||||||
* @param phi OUT: from this, more processing might needed.
|
|
||||||
*/
|
|
||||||
static inline void schwab_partition3sp2(
|
|
||||||
uint32_t *array,
|
|
||||||
int low,
|
|
||||||
int high,
|
|
||||||
uint32_t pivotval,
|
|
||||||
int *plo,
|
|
||||||
int *phi) {
|
|
||||||
|
|
||||||
/* Invariant for left: index until smaller (than pivot) elements lay */
|
|
||||||
int il = (low - 1);
|
|
||||||
/* Invariant for right: index until (from top) bigger elements lay */
|
|
||||||
int ir = (high + 1);
|
|
||||||
/* Indices from where we swap left and right into "is" (and sometimes swap among here too) */
|
|
||||||
int jl = low;
|
|
||||||
int jr = high;
|
|
||||||
|
|
||||||
while(jl <= jr) {
|
|
||||||
/* Handle left and find wrongly placed element */
|
|
||||||
while((array[jl] <= pivotval) && (jl <= jr)) {
|
|
||||||
int isNonPivot = (array[jl] != pivotval);
|
|
||||||
int nonSameIndex = (il + 1 != jl);
|
|
||||||
if(isNonPivot & nonSameIndex)
|
|
||||||
schwab_swap(&array[il + 1], &array[jl]);
|
|
||||||
il += isNonPivot;
|
|
||||||
++jl;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Handle right and find wrongly placed element */
|
|
||||||
while((array[jr] >= pivotval) && (jl <= jr)) {
|
|
||||||
int isNonPivot = (array[jr] != pivotval);
|
|
||||||
int nonSameIndex = (ir - 1 != jr);
|
|
||||||
if(isNonPivot & nonSameIndex)
|
|
||||||
schwab_swap(&array[ir - 1], &array[jr]);
|
|
||||||
ir -= isNonPivot;
|
|
||||||
--jr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Swap the two found elements that are wrongly placed */
|
|
||||||
if(jl < jr) schwab_swap(&array[jl], &array[jr]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Output the partition points */
|
|
||||||
*plo = il + 1; /* XXX: changed from qsort.h to +1 here! */
|
|
||||||
*phi = ir;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 4-way partitioning
|
* 4-way partitioning
|
||||||
*
|
*
|
||||||
@ -111,13 +41,12 @@ static inline void schwab_partition3sp2(
|
|||||||
* @param arr The array to partition
|
* @param arr The array to partition
|
||||||
* @param low Inclusive smallest index.
|
* @param low Inclusive smallest index.
|
||||||
* @param high Inclusive highest index.
|
* @param high Inclusive highest index.
|
||||||
* @param plo IN-OUT: input low pivot, output - see "Results:"
|
* @param plo IN-OUT: input low pivot, output index until elements <= low pivot.
|
||||||
* @param kmid IN: The mid spliting value (like a pivot value, but can be imaginary nonexistent)
|
* @param kmid IN: The mid spliting value (like a pivot value, but can be imaginary nonexistent)
|
||||||
* @param pmid OUT: output - see "Results:"
|
* @param pmid OUT: output index until elements <= mid pivot.
|
||||||
* @param phi IN-OUT: input high pivot, output - see "Results:"
|
* @param phi IN-OUT: input high pivot, output index until elements <= high pivot.
|
||||||
* @returns 1 if there is need to process the mid two blocks! Otherwise 0.
|
|
||||||
*/
|
*/
|
||||||
static inline int schwab_partition(
|
static inline void schwab_partition(
|
||||||
uint32_t *arr,
|
uint32_t *arr,
|
||||||
int low,
|
int low,
|
||||||
int high,
|
int high,
|
||||||
@ -130,28 +59,18 @@ static inline int schwab_partition(
|
|||||||
uint32_t klo = arr[*plo];
|
uint32_t klo = arr[*plo];
|
||||||
uint32_t khi = arr[*phi];
|
uint32_t khi = arr[*phi];
|
||||||
|
|
||||||
/* Without this, constant and smallrange is very slooOOoow */
|
|
||||||
if(khi - klo < SCHWAB_DELTA_THRESHOLD) {
|
|
||||||
/* Use three-way which defeats smallrange */
|
|
||||||
/* Outer sort func also optimized for two sides */
|
|
||||||
/* check for size for which recurse which not! */
|
|
||||||
schwab_partition3sp2(arr, low, high, kmid, plo, phi);
|
|
||||||
|
|
||||||
/* No need to process the midle two blocks - all pivot there */
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* [*] Swapping arr[phi]<->arr[high] ensures stop condition later */
|
/* [*] Swapping arr[phi]<->arr[high] ensures stop condition later */
|
||||||
uint32_t tmphi = arr[*phi];
|
uint32_t tmphi = arr[*phi];
|
||||||
arr[*phi] = arr[high];
|
arr[*phi] = arr[high];
|
||||||
arr[high] = tmphi;
|
arr[high] = tmphi;
|
||||||
|
|
||||||
/* Aren't inclusive end indices of 4 "blocks" - b0 is smallest vals */
|
/* Aren't inclusive end indices of 4 "blocks" - b0 is smallest vals */
|
||||||
int b0 = low, b1 = low, b2 = low;
|
int b0 = low, b1 = low, b2 = low, b3 = low;
|
||||||
|
|
||||||
for(int b3 = low; b3 < high; ++b3) {
|
while(b3 < high) {
|
||||||
/* This I moved to be first to avoid unnecessary curr copy below */
|
/* This I moved to be first for hot code path for constant / smallrange */
|
||||||
if(arr[b3] >= khi) {
|
if(arr[b3] >= khi) {
|
||||||
|
++b3;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -168,7 +87,7 @@ static inline int schwab_partition(
|
|||||||
arr[b2] = arr[b1];
|
arr[b2] = arr[b1];
|
||||||
arr[b1] = arr[b0];
|
arr[b1] = arr[b0];
|
||||||
arr[b0] = curr;
|
arr[b0] = curr;
|
||||||
++b0; ++b1; ++b2;
|
++b0; ++b1; ++b2; ++b3;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,11 +95,11 @@ static inline int schwab_partition(
|
|||||||
arr[b3] = arr[b2];
|
arr[b3] = arr[b2];
|
||||||
arr[b2] = arr[b1];
|
arr[b2] = arr[b1];
|
||||||
arr[b1] = curr;
|
arr[b1] = curr;
|
||||||
++b1; ++b2;
|
++b1; ++b2; ++b3;
|
||||||
} else {
|
} else {
|
||||||
arr[b3] = arr[b2];
|
arr[b3] = arr[b2];
|
||||||
arr[b2] = curr;
|
arr[b2] = curr;
|
||||||
++b2;
|
++b2; ++b3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -195,10 +114,7 @@ static inline int schwab_partition(
|
|||||||
/* Handle output vars as per doc comment */
|
/* Handle output vars as per doc comment */
|
||||||
*plo = b0;
|
*plo = b0;
|
||||||
*pmid = b1;
|
*pmid = b1;
|
||||||
*phi = b2;
|
*phi = b2; /* Because of: [*] */
|
||||||
|
|
||||||
/* There are mid parts to process */
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Swabic-sort its somewhat similar to quicksort but 4-way and tricky */
|
/** Swabic-sort its somewhat similar to quicksort but 4-way and tricky */
|
||||||
@ -228,7 +144,7 @@ static inline void schwab_sort(
|
|||||||
uint32_t kmid = klo + (khi - klo) / 2;
|
uint32_t kmid = klo + (khi - klo) / 2;
|
||||||
|
|
||||||
int pmid;
|
int pmid;
|
||||||
int needmid = schwab_partition(array, low, high, &plo, kmid, &pmid, &phi);
|
schwab_partition(array, low, high, &plo, kmid, &pmid, &phi);
|
||||||
|
|
||||||
/* See where NOT to recurse to avoid worst case stack depth */
|
/* See where NOT to recurse to avoid worst case stack depth */
|
||||||
/* Rem.: These might be "not real" length but we only use them to comparisons */
|
/* Rem.: These might be "not real" length but we only use them to comparisons */
|
||||||
@ -244,19 +160,15 @@ static inline void schwab_sort(
|
|||||||
/* possible, but had to put loops changes to the end */
|
/* possible, but had to put loops changes to the end */
|
||||||
if(lolen < hilen) {
|
if(lolen < hilen) {
|
||||||
schwab_sort(array, low, plo - 1, state);
|
schwab_sort(array, low, plo - 1, state);
|
||||||
if(needmid) {
|
|
||||||
schwab_sort(array, plo, pmid - 1, state);
|
schwab_sort(array, plo, pmid - 1, state);
|
||||||
schwab_sort(array, pmid, phi - 1, state);
|
schwab_sort(array, pmid, phi - 1, state);
|
||||||
}
|
|
||||||
|
|
||||||
low = phi;
|
low = phi;
|
||||||
/* high = high; */
|
/* high = high; */
|
||||||
} else {
|
} else {
|
||||||
schwab_sort(array, phi, high, state);
|
schwab_sort(array, phi, high, state);
|
||||||
if(needmid) {
|
|
||||||
schwab_sort(array, pmid, phi - 1, state);
|
schwab_sort(array, pmid, phi - 1, state);
|
||||||
schwab_sort(array, plo, pmid - 1, state);
|
schwab_sort(array, plo, pmid - 1, state);
|
||||||
}
|
|
||||||
|
|
||||||
/* low = low; */
|
/* low = low; */
|
||||||
high = plo - 1;
|
high = plo - 1;
|
||||||
@ -264,4 +176,4 @@ static inline void schwab_sort(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* SCHWAB_SORT_H */
|
#endif /* SWAB_SORT_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user