schwab: fixed various bugs, now passes magyarsorts all tests until 20k elements but very slow on constant data for some reason - needs checking
This commit is contained in:
parent
6fcf79bee3
commit
c06f02bc94
6
data.inc
6
data.inc
@ -1,6 +1,7 @@
|
||||
int data[] = {
|
||||
8, 7, 2, 1, 0, 9, 6,1
|
||||
/*8, 7, 2, 1, 0, 9, 6,
|
||||
8, 7, 2, 1, 0, 9, 6,1,
|
||||
8, 7, 2, 1, 0, 9, 6,
|
||||
8, 7, 2, 1, 0, 9, 6,
|
||||
8, 7, 2, 1, 0, 9, 6,
|
||||
8, 7, 2, 1, 0, 9, 6,
|
||||
8, 7, 2, 1, 0, 9, 6,
|
||||
@ -9,5 +10,4 @@ int data[] = {
|
||||
8, 7, 2, 1, 0, 9, 6,
|
||||
8, 7, 2, 1, 0, 9, 6,
|
||||
8, 7, 2, 1, 0, 9, 6,
|
||||
8, 7, 2, 1, 0, 9, 6,*/
|
||||
};
|
||||
|
@ -32,7 +32,7 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) {
|
||||
/**
|
||||
* 4-way partitioning
|
||||
*
|
||||
* Expects: arr[plo] <= arr[pmid] <= arr[phi]
|
||||
* Expects: arr[plo] <= kmid <= arr[phi]
|
||||
* Results: arr[low..plo - 1] <= arr[plo..pmid - 1] <= arr[pmid..phi - 1] <= arr[phi.. high]
|
||||
*
|
||||
* Also: Adding together lengths of all results arrays shrinks by 1 compared to start arr.
|
||||
@ -42,7 +42,8 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) {
|
||||
* @param low Inclusive smallest index.
|
||||
* @param high Inclusive highest index.
|
||||
* @param plo IN-OUT: input low pivot, output index until elements <= low pivot.
|
||||
* @param pmid IN-OUT: input mid pivot, output index until elements <= mid pivot.
|
||||
* @param kmid IN: The mid spliting value (like a pivot value, but can be imaginary nonexistent)
|
||||
* @param pmid OUT: output index until elements <= mid pivot.
|
||||
* @param phi IN-OUT: input high pivot, output index until elements <= high pivot.
|
||||
*/
|
||||
static inline void schwab_partition(
|
||||
@ -50,9 +51,14 @@ static inline void schwab_partition(
|
||||
int low,
|
||||
int high,
|
||||
int *plo,
|
||||
uint32_t kmid,
|
||||
int *pmid,
|
||||
int *phi) {
|
||||
|
||||
/* Keys only - no element copy is made here */
|
||||
uint32_t klo = arr[*plo];
|
||||
uint32_t khi = arr[*phi];
|
||||
|
||||
/* [*] Swapping arr[phi]<->arr[high] ensures stop condition later */
|
||||
uint32_t tmphi = arr[*phi];
|
||||
arr[*phi] = arr[high];
|
||||
@ -61,11 +67,6 @@ static inline void schwab_partition(
|
||||
/* Aren't inclusive end indices of 4 "blocks" - b0 is smallest vals */
|
||||
int b0 = low, b1 = low, b2 = low, b3 = low;
|
||||
|
||||
/* Keys only - no element copy is made here */
|
||||
uint32_t klo = arr[*plo];
|
||||
uint32_t kmid = arr[*pmid];
|
||||
uint32_t khi = arr[*phi];
|
||||
|
||||
while(b3 < high) {
|
||||
/* This I moved to be first for hot code path for constant / smallrange */
|
||||
if(arr[b3] >= khi) {
|
||||
@ -106,8 +107,9 @@ static inline void schwab_partition(
|
||||
/* This way we can return bigger index and by that */
|
||||
/* this always removes an element per run at least */
|
||||
tmphi = arr[b2];
|
||||
arr[b2++] = arr[high];
|
||||
arr[b2] = arr[high];
|
||||
arr[high] = tmphi;
|
||||
++b2;
|
||||
|
||||
/* Handle output vars as per doc comment */
|
||||
*plo = b0;
|
||||
@ -115,7 +117,7 @@ static inline void schwab_partition(
|
||||
*phi = b2; /* Because of: [*] */
|
||||
}
|
||||
|
||||
/** Always at most log(n) space needing 4-way quicksort-like alg */
|
||||
/** Swabic-sort its somewhat similar to quicksort but 4-way and tricky */
|
||||
static inline void schwab_sort(
|
||||
uint32_t *array,
|
||||
int low,
|
||||
@ -126,72 +128,52 @@ static inline void schwab_sort(
|
||||
while(low < high) {
|
||||
int r0 = schwab_pick_pivot(state, (high + 1) - low) + low;
|
||||
int r1 = schwab_pick_pivot(state, (high + 1) - low) + low;
|
||||
|
||||
uint32_t k0 = array[r0];
|
||||
uint32_t k1 = array[r1];
|
||||
uint32_t klo = array[r0];
|
||||
uint32_t khi = array[r1];
|
||||
int plo = r0;
|
||||
int phi = r1;
|
||||
if(k0 > k1) {
|
||||
if(klo > khi) {
|
||||
uint32_t ktmp = klo;
|
||||
klo = khi;
|
||||
khi = ktmp;
|
||||
|
||||
plo = r1;
|
||||
phi = r0;
|
||||
uint32_t tmp = k0;
|
||||
k0 = k1;
|
||||
k1 = tmp;
|
||||
}
|
||||
|
||||
int r2 = schwab_pick_pivot(state, (phi + 1) - plo) + plo;
|
||||
uint32_t k2 = array[r2];
|
||||
int pmid = r2;
|
||||
if(k2 < k0) {
|
||||
int tmp = plo;
|
||||
plo = pmid;
|
||||
pmid = tmp;
|
||||
} else if(k2 > k1) {
|
||||
int tmp = phi;
|
||||
phi = pmid;
|
||||
pmid = tmp;
|
||||
}
|
||||
uint32_t kmid = klo + (khi - klo) / 2;
|
||||
|
||||
schwab_partition(array, low, high, &plo, &pmid, &phi);
|
||||
int pmid;
|
||||
schwab_partition(array, low, high, &plo, kmid, &pmid, &phi);
|
||||
|
||||
/* See where NOT to recurse to avoid worst case stack depth */
|
||||
/* Rem.: These might be "not real" length but we only use them to comparisons */
|
||||
/* REM.: The "real" lengths might be off-by-one but these are FASTER! */
|
||||
int lolen = plo - low;
|
||||
int lomidlen = pmid - plo;
|
||||
int himidlen = phi - pmid;
|
||||
int hilen = high - phi;
|
||||
int lomax = (lolen > lomidlen) ? lolen : lomidlen;
|
||||
int himax = (hilen > himidlen) ? hilen : himidlen;
|
||||
|
||||
/* Rewrite loop for worst subtask goal and recurse others! */
|
||||
/* Let the branch predictor try to predict input data path */
|
||||
if(lomax < himax) {
|
||||
/* Rem.: Best would be to check for biggest in all 4 block */
|
||||
/* But that would complicate codes above this point! */
|
||||
/* Rem.: Order of operations try to be a cache-friendly as */
|
||||
/* possible, but had to put loops changes to the end */
|
||||
if(lolen < hilen) {
|
||||
schwab_sort(array, low, plo - 1, state);
|
||||
schwab_sort(array, plo, pmid - 1, state);
|
||||
if(hilen > himidlen) {
|
||||
schwab_sort(array, pmid, phi - 1, state);
|
||||
|
||||
low = phi;
|
||||
/* high = high; */
|
||||
} else {
|
||||
schwab_sort(array, phi, high, state);
|
||||
low = pmid;
|
||||
high = phi - 1;
|
||||
}
|
||||
} else {
|
||||
schwab_sort(array, pmid, phi - 1, state);
|
||||
schwab_sort(array, phi, high, state);
|
||||
if(lolen < lomidlen) {
|
||||
schwab_sort(array, low, plo - 1, state);
|
||||
low = plo;
|
||||
high = pmid - 1;
|
||||
} else {
|
||||
schwab_sort(array, plo, pmid - 1, state);
|
||||
|
||||
/* low = low; */
|
||||
high = plo - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* SWAB_SORT_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user