From 147ca60672c8f3ae8c08cbce939ab75625e11507 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Fri, 9 May 2025 04:49:31 +0200 Subject: [PATCH] schwab insertion - but buggy from some previous at n=20 rand --- schwab_sort.h | 120 +++++++++++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 46 deletions(-) diff --git a/schwab_sort.h b/schwab_sort.h index 038332e..e36740c 100644 --- a/schwab_sort.h +++ b/schwab_sort.h @@ -14,6 +14,11 @@ #define SCHWAB_DELTA_THRESHOLD 32 #endif /* SCHWAB_DELTA_THRESHOLD */ +/** Below this many elements we do insertion sort */ +#ifndef SCHWAB_INSERTION_THRESHOLD +#define SCHWAB_INSERTION_THRESHOLD 4 +#endif /* SCHWAB_DELTA_THRESHOLD */ + typedef uint32_t sch_rand_state; /** Create rand state for schwab_sort using a seed - can give 0 if uninterested */ @@ -41,6 +46,22 @@ static inline void schwab_swap(uint32_t *a, uint32_t *b) { *b = t; } +/** Simple insertion sort for small cases */ +inline void sch_insertion_sort(uint32_t *arr, int low, int high) { + for (int i = low + 1; i <= high; ++i) { + uint32_t key = arr[i]; + int j = i; + + /* Move elements of arr[0..i-1] that are greater than key */ + /* to one position ahead of their current position */ + while (j > 0 && arr[j - 1] > key) { + arr[j] = arr[j - 1]; + --j; + } + arr[j] = key; + } +} + /** * 3-way partitioning, in middle all the pivot elements. * @@ -221,57 +242,64 @@ static inline void schwab_sort( sch_rand_state *state) { /* Loop handles longest sub-sort-task which ensused log tree depth */ + /* Loop also handles start condition */ while(low < high) { - int r0 = schwab_pick_pivot(state, (high + 1) - low) + low; - int r1 = schwab_pick_pivot(state, (high + 1) - low) + low; - uint32_t klo = array[r0]; - uint32_t khi = array[r1]; - int plo = r0; - int phi = r1; - if(klo > khi) { - uint32_t ktmp = klo; - klo = khi; - khi = ktmp; + if(1 /*high - low > SCHWAB_INSERTION_THRESHOLD*/) { + int r0 = schwab_pick_pivot(state, (high + 1) - low) + low; + int r1 = schwab_pick_pivot(state, (high + 1) - low) + low; + uint32_t klo = array[r0]; + uint32_t khi = array[r1]; + int plo = r0; + int phi = r1; + if(klo > khi) { + uint32_t ktmp = klo; + klo = khi; + khi = ktmp; - plo = r1; - phi = r0; - } - - uint32_t kmid = klo + (khi - klo) / 2; - - int pmid; - int needmid = schwab_partition(array, low, high, &plo, kmid, &pmid, &phi); - - /* See where NOT to recurse to avoid worst case stack depth */ - /* Rem.: These might be "not real" length but we only use them to comparisons */ - /* REM.: The "real" lengths might be off-by-one but these are FASTER! */ - int lolen = plo - low; - int hilen = high - phi; - - /* Rewrite loop for worst subtask goal and recurse others! */ - /* Let the branch predictor try to predict input data path */ - /* Rem.: Best would be to check for biggest in all 4 block */ - /* But that would complicate codes above this point! */ - /* Rem.: Order of operations try to be a cache-friendly as */ - /* possible, but had to put loops changes to the end */ - if(lolen < hilen) { - schwab_sort(array, low, plo - 1, state); - if(needmid) { - schwab_sort(array, plo, pmid - 1, state); - schwab_sort(array, pmid, phi - 1, state); + plo = r1; + phi = r0; } - low = phi; - /* high = high; */ + uint32_t kmid = klo + (khi - klo) / 2; + + int pmid; + int needmid = schwab_partition(array, low, high, &plo, kmid, &pmid, &phi); + + /* See where NOT to recurse to avoid worst case stack depth */ + /* Rem.: These might be "not real" length but we only use them to comparisons */ + /* REM.: The "real" lengths might be off-by-one but these are FASTER! */ + int lolen = plo - low; + int hilen = high - phi; + + /* Rewrite loop for worst subtask goal and recurse others! */ + /* Let the branch predictor try to predict input data path */ + /* Rem.: Best would be to check for biggest in all 4 block */ + /* But that would complicate codes above this point! */ + /* Rem.: Order of operations try to be a cache-friendly as */ + /* possible, but had to put loops changes to the end */ + if(lolen < hilen) { + schwab_sort(array, low, plo - 1, state); + if(needmid) { + schwab_sort(array, plo, pmid - 1, state); + schwab_sort(array, pmid, phi - 1, state); + } + + low = phi; + /* high = high; */ + } else { + schwab_sort(array, phi, high, state); + if(needmid) { + schwab_sort(array, pmid, phi - 1, state); + schwab_sort(array, plo, pmid - 1, state); + } + + /* low = low; */ + high = plo - 1; + } } else { - schwab_sort(array, phi, high, state); - if(needmid) { - schwab_sort(array, pmid, phi - 1, state); - schwab_sort(array, plo, pmid - 1, state); - } - - /* low = low; */ - high = plo - 1; + /* Just do an insertion sort instead */ + sch_insertion_sort(array, low, high); + return; } } }