diff --git a/schwab_sort.h b/schwab_sort.h index 9905e09..a11e0bb 100644 --- a/schwab_sort.h +++ b/schwab_sort.h @@ -17,7 +17,12 @@ /** Below this many elements we do insertion sort */ #ifndef SCHWAB_INSERTION_THRESHOLD #define SCHWAB_INSERTION_THRESHOLD 128 -#endif /* SCHWAB_DELTA_THRESHOLD */ +#endif /* SCHWAB_INSERTION_THRESHOLD */ + +/** Below this many elements we do insertion sort */ +#ifndef SCHWAB_SELECTION_THRESHOLD +#define SCHWAB_SELECTION_THRESHOLD 16 +#endif /* SCHWAB_SELECTION_THRESHOLD */ typedef uint32_t sch_rand_state; @@ -65,7 +70,7 @@ inline void sch_insertion_sort(uint32_t *arr, int low, int high) { /** Simple insertion sort for small cases v2 - not necessarily better */ inline void sch_insertion_sort2(uint32_t* arr, int low, int high) { - for(size_t i = low + 1; i <= high; ++i) { + for(int i = low + 1; i <= high; ++i) { uint32_t key = arr[i]; /* Separate load and compare to expose ILP */ @@ -81,6 +86,23 @@ inline void sch_insertion_sort2(uint32_t* arr, int low, int high) { } } +/** Simple SELECTION sort for small cases - not necessarily better */ +inline void sch_selection_sort(uint32_t* arr, int low, int high) { + #pragma GCC unroll 2 + for(int i = low; i < high; ++i) { + /* Min-search remaining array */ + int mini = i; + #pragma GCC unroll 4 + for(int j = i + 1; j < high + 1; ++j) { + if(arr[j] < arr[mini]) mini = j; + } + + if(mini != i) { + schwab_swap(&arr[i], &arr[mini]); + } + } +} + /** * 3-way partitioning, in middle all the pivot elements. * @@ -316,9 +338,14 @@ static inline void schwab_sort( high = plo - 1; } } else { - /* Just do an insertion sort instead */ - sch_insertion_sort(array, low, high); - return; + if(high - low > SCHWAB_SELECTION_THRESHOLD) { + /* Just do an insertion sort instead */ + sch_insertion_sort(array, low, high); + return; + } else { + sch_selection_sort(array, low, high); + return; + } } } }