schwab: fixed various bugs, now passes magyarsorts all tests until 20k elements but very slow on constant data for some reason - needs checking
This commit is contained in:
parent
6fcf79bee3
commit
c06f02bc94
6
data.inc
6
data.inc
@ -1,6 +1,7 @@
|
|||||||
int data[] = {
|
int data[] = {
|
||||||
8, 7, 2, 1, 0, 9, 6,1
|
8, 7, 2, 1, 0, 9, 6,1,
|
||||||
/*8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
@ -9,5 +10,4 @@ int data[] = {
|
|||||||
8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
8, 7, 2, 1, 0, 9, 6,*/
|
|
||||||
};
|
};
|
||||||
|
@ -32,7 +32,7 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) {
|
|||||||
/**
|
/**
|
||||||
* 4-way partitioning
|
* 4-way partitioning
|
||||||
*
|
*
|
||||||
* Expects: arr[plo] <= arr[pmid] <= arr[phi]
|
* Expects: arr[plo] <= kmid <= arr[phi]
|
||||||
* Results: arr[low..plo - 1] <= arr[plo..pmid - 1] <= arr[pmid..phi - 1] <= arr[phi.. high]
|
* Results: arr[low..plo - 1] <= arr[plo..pmid - 1] <= arr[pmid..phi - 1] <= arr[phi.. high]
|
||||||
*
|
*
|
||||||
* Also: Adding together lengths of all results arrays shrinks by 1 compared to start arr.
|
* Also: Adding together lengths of all results arrays shrinks by 1 compared to start arr.
|
||||||
@ -42,7 +42,8 @@ static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) {
|
|||||||
* @param low Inclusive smallest index.
|
* @param low Inclusive smallest index.
|
||||||
* @param high Inclusive highest index.
|
* @param high Inclusive highest index.
|
||||||
* @param plo IN-OUT: input low pivot, output index until elements <= low pivot.
|
* @param plo IN-OUT: input low pivot, output index until elements <= low pivot.
|
||||||
* @param pmid IN-OUT: input mid pivot, output index until elements <= mid pivot.
|
* @param kmid IN: The mid spliting value (like a pivot value, but can be imaginary nonexistent)
|
||||||
|
* @param pmid OUT: output index until elements <= mid pivot.
|
||||||
* @param phi IN-OUT: input high pivot, output index until elements <= high pivot.
|
* @param phi IN-OUT: input high pivot, output index until elements <= high pivot.
|
||||||
*/
|
*/
|
||||||
static inline void schwab_partition(
|
static inline void schwab_partition(
|
||||||
@ -50,9 +51,14 @@ static inline void schwab_partition(
|
|||||||
int low,
|
int low,
|
||||||
int high,
|
int high,
|
||||||
int *plo,
|
int *plo,
|
||||||
|
uint32_t kmid,
|
||||||
int *pmid,
|
int *pmid,
|
||||||
int *phi) {
|
int *phi) {
|
||||||
|
|
||||||
|
/* Keys only - no element copy is made here */
|
||||||
|
uint32_t klo = arr[*plo];
|
||||||
|
uint32_t khi = arr[*phi];
|
||||||
|
|
||||||
/* [*] Swapping arr[phi]<->arr[high] ensures stop condition later */
|
/* [*] Swapping arr[phi]<->arr[high] ensures stop condition later */
|
||||||
uint32_t tmphi = arr[*phi];
|
uint32_t tmphi = arr[*phi];
|
||||||
arr[*phi] = arr[high];
|
arr[*phi] = arr[high];
|
||||||
@ -61,11 +67,6 @@ static inline void schwab_partition(
|
|||||||
/* Aren't inclusive end indices of 4 "blocks" - b0 is smallest vals */
|
/* Aren't inclusive end indices of 4 "blocks" - b0 is smallest vals */
|
||||||
int b0 = low, b1 = low, b2 = low, b3 = low;
|
int b0 = low, b1 = low, b2 = low, b3 = low;
|
||||||
|
|
||||||
/* Keys only - no element copy is made here */
|
|
||||||
uint32_t klo = arr[*plo];
|
|
||||||
uint32_t kmid = arr[*pmid];
|
|
||||||
uint32_t khi = arr[*phi];
|
|
||||||
|
|
||||||
while(b3 < high) {
|
while(b3 < high) {
|
||||||
/* This I moved to be first for hot code path for constant / smallrange */
|
/* This I moved to be first for hot code path for constant / smallrange */
|
||||||
if(arr[b3] >= khi) {
|
if(arr[b3] >= khi) {
|
||||||
@ -106,8 +107,9 @@ static inline void schwab_partition(
|
|||||||
/* This way we can return bigger index and by that */
|
/* This way we can return bigger index and by that */
|
||||||
/* this always removes an element per run at least */
|
/* this always removes an element per run at least */
|
||||||
tmphi = arr[b2];
|
tmphi = arr[b2];
|
||||||
arr[b2++] = arr[high];
|
arr[b2] = arr[high];
|
||||||
arr[high] = tmphi;
|
arr[high] = tmphi;
|
||||||
|
++b2;
|
||||||
|
|
||||||
/* Handle output vars as per doc comment */
|
/* Handle output vars as per doc comment */
|
||||||
*plo = b0;
|
*plo = b0;
|
||||||
@ -115,7 +117,7 @@ static inline void schwab_partition(
|
|||||||
*phi = b2; /* Because of: [*] */
|
*phi = b2; /* Because of: [*] */
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Always at most log(n) space needing 4-way quicksort-like alg */
|
/** Swabic-sort its somewhat similar to quicksort but 4-way and tricky */
|
||||||
static inline void schwab_sort(
|
static inline void schwab_sort(
|
||||||
uint32_t *array,
|
uint32_t *array,
|
||||||
int low,
|
int low,
|
||||||
@ -126,70 +128,50 @@ static inline void schwab_sort(
|
|||||||
while(low < high) {
|
while(low < high) {
|
||||||
int r0 = schwab_pick_pivot(state, (high + 1) - low) + low;
|
int r0 = schwab_pick_pivot(state, (high + 1) - low) + low;
|
||||||
int r1 = schwab_pick_pivot(state, (high + 1) - low) + low;
|
int r1 = schwab_pick_pivot(state, (high + 1) - low) + low;
|
||||||
|
uint32_t klo = array[r0];
|
||||||
uint32_t k0 = array[r0];
|
uint32_t khi = array[r1];
|
||||||
uint32_t k1 = array[r1];
|
|
||||||
int plo = r0;
|
int plo = r0;
|
||||||
int phi = r1;
|
int phi = r1;
|
||||||
if(k0 > k1) {
|
if(klo > khi) {
|
||||||
|
uint32_t ktmp = klo;
|
||||||
|
klo = khi;
|
||||||
|
khi = ktmp;
|
||||||
|
|
||||||
plo = r1;
|
plo = r1;
|
||||||
phi = r0;
|
phi = r0;
|
||||||
uint32_t tmp = k0;
|
|
||||||
k0 = k1;
|
|
||||||
k1 = tmp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int r2 = schwab_pick_pivot(state, (phi + 1) - plo) + plo;
|
uint32_t kmid = klo + (khi - klo) / 2;
|
||||||
uint32_t k2 = array[r2];
|
|
||||||
int pmid = r2;
|
|
||||||
if(k2 < k0) {
|
|
||||||
int tmp = plo;
|
|
||||||
plo = pmid;
|
|
||||||
pmid = tmp;
|
|
||||||
} else if(k2 > k1) {
|
|
||||||
int tmp = phi;
|
|
||||||
phi = pmid;
|
|
||||||
pmid = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
schwab_partition(array, low, high, &plo, &pmid, &phi);
|
int pmid;
|
||||||
|
schwab_partition(array, low, high, &plo, kmid, &pmid, &phi);
|
||||||
|
|
||||||
/* See where NOT to recurse to avoid worst case stack depth */
|
/* See where NOT to recurse to avoid worst case stack depth */
|
||||||
/* Rem.: These might be "not real" length but we only use them to comparisons */
|
/* Rem.: These might be "not real" length but we only use them to comparisons */
|
||||||
/* REM.: The "real" lengths might be off-by-one but these are FASTER! */
|
/* REM.: The "real" lengths might be off-by-one but these are FASTER! */
|
||||||
int lolen = plo - low;
|
int lolen = plo - low;
|
||||||
int lomidlen = pmid - plo;
|
int hilen = high - phi;
|
||||||
int himidlen = phi - pmid;
|
|
||||||
int hilen = high -phi;
|
|
||||||
int lomax = (lolen > lomidlen) ? lolen : lomidlen;
|
|
||||||
int himax = (hilen > himidlen) ? hilen : himidlen;
|
|
||||||
|
|
||||||
/* Rewrite loop for worst subtask goal and recurse others! */
|
/* Rewrite loop for worst subtask goal and recurse others! */
|
||||||
/* Let the branch predictor try to predict input data path */
|
/* Let the branch predictor try to predict input data path */
|
||||||
if(lomax < himax) {
|
/* Rem.: Best would be to check for biggest in all 4 block */
|
||||||
|
/* But that would complicate codes above this point! */
|
||||||
|
/* Rem.: Order of operations try to be a cache-friendly as */
|
||||||
|
/* possible, but had to put loops changes to the end */
|
||||||
|
if(lolen < hilen) {
|
||||||
schwab_sort(array, low, plo - 1, state);
|
schwab_sort(array, low, plo - 1, state);
|
||||||
schwab_sort(array, plo, pmid - 1, state);
|
schwab_sort(array, plo, pmid - 1, state);
|
||||||
if(hilen > himidlen) {
|
|
||||||
schwab_sort(array, pmid, phi - 1, state);
|
|
||||||
low = phi;
|
|
||||||
/* high = high; */
|
|
||||||
} else {
|
|
||||||
schwab_sort(array, phi, high, state);
|
|
||||||
low = pmid;
|
|
||||||
high = phi - 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
schwab_sort(array, pmid, phi - 1, state);
|
schwab_sort(array, pmid, phi - 1, state);
|
||||||
|
|
||||||
|
low = phi;
|
||||||
|
/* high = high; */
|
||||||
|
} else {
|
||||||
schwab_sort(array, phi, high, state);
|
schwab_sort(array, phi, high, state);
|
||||||
if(lolen < lomidlen) {
|
schwab_sort(array, pmid, phi - 1, state);
|
||||||
schwab_sort(array, low, plo - 1, state);
|
schwab_sort(array, plo, pmid - 1, state);
|
||||||
low = plo;
|
|
||||||
high = pmid - 1;
|
/* low = low; */
|
||||||
} else {
|
high = plo - 1;
|
||||||
schwab_sort(array, plo, pmid - 1, state);
|
|
||||||
/* low = low; */
|
|
||||||
high = plo - 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user