diff --git a/data.inc b/data.inc index 895633f..85b44e7 100644 --- a/data.inc +++ b/data.inc @@ -1,6 +1,7 @@ int data[] = { - 8, 7, 2, 1, 0, 9, 6,1 - /*8, 7, 2, 1, 0, 9, 6, + 8, 7, 2, 1, 0, 9, 6,1, + 8, 7, 2, 1, 0, 9, 6, + 8, 7, 2, 1, 0, 9, 6, 8, 7, 2, 1, 0, 9, 6, 8, 7, 2, 1, 0, 9, 6, 8, 7, 2, 1, 0, 9, 6, @@ -9,5 +10,4 @@ int data[] = { 8, 7, 2, 1, 0, 9, 6, 8, 7, 2, 1, 0, 9, 6, 8, 7, 2, 1, 0, 9, 6, - 8, 7, 2, 1, 0, 9, 6,*/ }; diff --git a/schwab_sort.h b/schwab_sort.h index f287883..f026d40 100644 --- a/schwab_sort.h +++ b/schwab_sort.h @@ -32,7 +32,7 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) { /** * 4-way partitioning * - * Expects: arr[plo] <= arr[pmid] <= arr[phi] + * Expects: arr[plo] <= kmid <= arr[phi] * Results: arr[low..plo - 1] <= arr[plo..pmid - 1] <= arr[pmid..phi - 1] <= arr[phi.. high] * * Also: Adding together lengths of all results arrays shrinks by 1 compared to start arr. @@ -42,7 +42,8 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) { * @param low Inclusive smallest index. * @param high Inclusive highest index. * @param plo IN-OUT: input low pivot, output index until elements <= low pivot. - * @param pmid IN-OUT: input mid pivot, output index until elements <= mid pivot. + * @param kmid IN: The mid spliting value (like a pivot value, but can be imaginary nonexistent) + * @param pmid OUT: output index until elements <= mid pivot. * @param phi IN-OUT: input high pivot, output index until elements <= high pivot. */ static inline void schwab_partition( @@ -50,9 +51,14 @@ static inline void schwab_partition( int low, int high, int *plo, + uint32_t kmid, int *pmid, int *phi) { + /* Keys only - no element copy is made here */ + uint32_t klo = arr[*plo]; + uint32_t khi = arr[*phi]; + /* [*] Swapping arr[phi]<->arr[high] ensures stop condition later */ uint32_t tmphi = arr[*phi]; arr[*phi] = arr[high]; @@ -61,11 +67,6 @@ static inline void schwab_partition( /* Aren't inclusive end indices of 4 "blocks" - b0 is smallest vals */ int b0 = low, b1 = low, b2 = low, b3 = low; - /* Keys only - no element copy is made here */ - uint32_t klo = arr[*plo]; - uint32_t kmid = arr[*pmid]; - uint32_t khi = arr[*phi]; - while(b3 < high) { /* This I moved to be first for hot code path for constant / smallrange */ if(arr[b3] >= khi) { @@ -106,8 +107,9 @@ static inline void schwab_partition( /* This way we can return bigger index and by that */ /* this always removes an element per run at least */ tmphi = arr[b2]; - arr[b2++] = arr[high]; + arr[b2] = arr[high]; arr[high] = tmphi; + ++b2; /* Handle output vars as per doc comment */ *plo = b0; @@ -115,7 +117,7 @@ static inline void schwab_partition( *phi = b2; /* Because of: [*] */ } -/** Always at most log(n) space needing 4-way quicksort-like alg */ +/** Swabic-sort its somewhat similar to quicksort but 4-way and tricky */ static inline void schwab_sort( uint32_t *array, int low, @@ -126,70 +128,50 @@ static inline void schwab_sort( while(low < high) { int r0 = schwab_pick_pivot(state, (high + 1) - low) + low; int r1 = schwab_pick_pivot(state, (high + 1) - low) + low; - - uint32_t k0 = array[r0]; - uint32_t k1 = array[r1]; + uint32_t klo = array[r0]; + uint32_t khi = array[r1]; int plo = r0; int phi = r1; - if(k0 > k1) { + if(klo > khi) { + uint32_t ktmp = klo; + klo = khi; + khi = ktmp; + plo = r1; phi = r0; - uint32_t tmp = k0; - k0 = k1; - k1 = tmp; } - int r2 = schwab_pick_pivot(state, (phi + 1) - plo) + plo; - uint32_t k2 = array[r2]; - int pmid = r2; - if(k2 < k0) { - int tmp = plo; - plo = pmid; - pmid = tmp; - } else if(k2 > k1) { - int tmp = phi; - phi = pmid; - pmid = tmp; - } + uint32_t kmid = klo + (khi - klo) / 2; - schwab_partition(array, low, high, &plo, &pmid, &phi); + int pmid; + schwab_partition(array, low, high, &plo, kmid, &pmid, &phi); /* See where NOT to recurse to avoid worst case stack depth */ /* Rem.: These might be "not real" length but we only use them to comparisons */ /* REM.: The "real" lengths might be off-by-one but these are FASTER! */ int lolen = plo - low; - int lomidlen = pmid - plo; - int himidlen = phi - pmid; - int hilen = high -phi; - int lomax = (lolen > lomidlen) ? lolen : lomidlen; - int himax = (hilen > himidlen) ? hilen : himidlen; + int hilen = high - phi; /* Rewrite loop for worst subtask goal and recurse others! */ /* Let the branch predictor try to predict input data path */ - if(lomax < himax) { + /* Rem.: Best would be to check for biggest in all 4 block */ + /* But that would complicate codes above this point! */ + /* Rem.: Order of operations try to be a cache-friendly as */ + /* possible, but had to put loops changes to the end */ + if(lolen < hilen) { schwab_sort(array, low, plo - 1, state); schwab_sort(array, plo, pmid - 1, state); - if(hilen > himidlen) { - schwab_sort(array, pmid, phi - 1, state); - low = phi; - /* high = high; */ - } else { - schwab_sort(array, phi, high, state); - low = pmid; - high = phi - 1; - } - } else { schwab_sort(array, pmid, phi - 1, state); + + low = phi; + /* high = high; */ + } else { schwab_sort(array, phi, high, state); - if(lolen < lomidlen) { - schwab_sort(array, low, plo - 1, state); - low = plo; - high = pmid - 1; - } else { - schwab_sort(array, plo, pmid - 1, state); - /* low = low; */ - high = plo - 1; - } + schwab_sort(array, pmid, phi - 1, state); + schwab_sort(array, plo, pmid - 1, state); + + /* low = low; */ + high = plo - 1; } } }