Compare commits
No commits in common. "21c9d52138141c3e820533f556c5f26a9491f28d" and "c1d152c6f9477be151177c04f2914f8d624962be" have entirely different histories.
21c9d52138
...
c1d152c6f9
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
/** Below this many elements we do insertion sort */
|
/** Below this many elements we do insertion sort */
|
||||||
#ifndef SCHWAB_SELECTION_THRESHOLD
|
#ifndef SCHWAB_SELECTION_THRESHOLD
|
||||||
#define SCHWAB_SELECTION_THRESHOLD 8
|
#define SCHWAB_SELECTION_THRESHOLD 16
|
||||||
#endif /* SCHWAB_SELECTION_THRESHOLD */
|
#endif /* SCHWAB_SELECTION_THRESHOLD */
|
||||||
|
|
||||||
typedef uint32_t sch_rand_state;
|
typedef uint32_t sch_rand_state;
|
||||||
@ -51,75 +51,8 @@ static inline void schwab_swap(uint32_t *a, uint32_t *b) {
|
|||||||
*b = t;
|
*b = t;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Branchless conditional swap */
|
|
||||||
#define SCH_CSWAP(a, b) do { \
|
|
||||||
uint32_t _x = (a), _y = (b); \
|
|
||||||
uint32_t _gt = -(_x > _y); \
|
|
||||||
uint32_t _tmp = (_x ^ _y) & _gt; \
|
|
||||||
(a) = _x ^ _tmp; \
|
|
||||||
(b) = _y ^ _tmp; \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tiny unrolled register-heapsort of exactly 5 elements.
|
|
||||||
*
|
|
||||||
* This heap:
|
|
||||||
*
|
|
||||||
* a[0]
|
|
||||||
* / \
|
|
||||||
* a[1] a[2]
|
|
||||||
* / \
|
|
||||||
* a[3] a[4]
|
|
||||||
*
|
|
||||||
* @param a The array
|
|
||||||
*/
|
|
||||||
void sch_hsort5(uint32_t* a) {
|
|
||||||
/* Build max heap */
|
|
||||||
SCH_CSWAP(a[1], a[3]);
|
|
||||||
SCH_CSWAP(a[1], a[4]);
|
|
||||||
|
|
||||||
/* Heapify(0) */
|
|
||||||
/* Max child = (a[2] > a[1]) ? 2 : 1; */
|
|
||||||
/* SCH_CSWAP(a[0], a[maxChild]) */
|
|
||||||
uint32_t cmp = -(a[2] > a[1]);
|
|
||||||
uint32_t i = (1 ^ 2) & cmp ^ 1;
|
|
||||||
/* Right selects a[2] if cmp==1, else 1 */
|
|
||||||
SCH_CSWAP(a[0], a[i]);
|
|
||||||
|
|
||||||
/* Sort phase */
|
|
||||||
|
|
||||||
/* 1st max to end */
|
|
||||||
SCH_CSWAP(a[0], a[4]);
|
|
||||||
/* heapify a[0..3] */
|
|
||||||
cmp = -(a[2] > a[1]);
|
|
||||||
i = (1 ^ 2) & cmp ^ 1;
|
|
||||||
SCH_CSWAP(a[0], a[i]);
|
|
||||||
|
|
||||||
/* 2nd max to end */
|
|
||||||
SCH_CSWAP(a[0], a[3]);
|
|
||||||
/* heapify a[0..2] */
|
|
||||||
cmp = -(a[2] > a[1]);
|
|
||||||
i = (1 ^ 2) & cmp ^ 1;
|
|
||||||
SCH_CSWAP(a[0], a[i]);
|
|
||||||
|
|
||||||
/* 3rd max to end */
|
|
||||||
SCH_CSWAP(a[0], a[2]);
|
|
||||||
|
|
||||||
/* Final two */
|
|
||||||
SCH_CSWAP(a[0], a[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Simple insertion sort for small cases */
|
/** Simple insertion sort for small cases */
|
||||||
inline void sch_insertion_sort(uint32_t *arr, int low, int high) {
|
inline void sch_insertion_sort(uint32_t *arr, int low, int high) {
|
||||||
/* Dual heapsort5 probably helps insertion speed */
|
|
||||||
/* This is sane, because insertion benefits from */
|
|
||||||
/* data being "basically nearly sorted" as input */
|
|
||||||
if(high - low > 10) {
|
|
||||||
sch_hsort5(&arr[0]);
|
|
||||||
sch_hsort5(&arr[5]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* "Real" insertion sort part comes here */
|
|
||||||
for(int i = low + 1; i <= high; ++i) {
|
for(int i = low + 1; i <= high; ++i) {
|
||||||
uint32_t key = arr[i];
|
uint32_t key = arr[i];
|
||||||
|
|
||||||
@ -135,6 +68,24 @@ inline void sch_insertion_sort(uint32_t *arr, int low, int high) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Simple insertion sort for small cases v2 - not necessarily better */
|
||||||
|
inline void sch_insertion_sort2(uint32_t* arr, int low, int high) {
|
||||||
|
for(int i = low + 1; i <= high; ++i) {
|
||||||
|
uint32_t key = arr[i];
|
||||||
|
|
||||||
|
/* Separate load and compare to expose ILP */
|
||||||
|
int j = i;
|
||||||
|
#pragma GCC unroll 2
|
||||||
|
while(j > 0) {
|
||||||
|
uint32_t prev = arr[j - 1];
|
||||||
|
if (prev <= key) break;
|
||||||
|
arr[j] = prev;
|
||||||
|
--j;
|
||||||
|
}
|
||||||
|
arr[j] = key;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Simple SELECTION sort for small cases - not necessarily better */
|
/** Simple SELECTION sort for small cases - not necessarily better */
|
||||||
inline void sch_selection_sort(uint32_t* arr, int low, int high) {
|
inline void sch_selection_sort(uint32_t* arr, int low, int high) {
|
||||||
#pragma GCC unroll 2
|
#pragma GCC unroll 2
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user