schwab: goto-optimized - barely slower on my machine
This commit is contained in:
parent
283783bf9b
commit
b32c7540a2
@ -158,11 +158,24 @@ static inline int schwab_partition(
|
|||||||
/* TODO: should be copy of whole element when not just uint32s! */
|
/* TODO: should be copy of whole element when not just uint32s! */
|
||||||
uint32_t curr = arr[b3];
|
uint32_t curr = arr[b3];
|
||||||
|
|
||||||
/* TODO: We can do "ILP-memcpy"s here:
|
/* Half-branchless and half-goto trickery */
|
||||||
*
|
int where = (curr < klo) ? 0 :
|
||||||
* Key from b2->b3, value from b2->b3, key from b1->b2, value from b1... etc
|
((curr < kmid) ? 1 : 2);
|
||||||
* This is likely faster than calling a memcpy if we code this for not just uint32s!
|
int target = (curr < klo) ? b0 :
|
||||||
*/
|
((curr < kmid) ? b1 : b2);
|
||||||
|
|
||||||
|
arr[b3] = arr[b2];
|
||||||
|
if(where == 2) goto auss;
|
||||||
|
arr[b2] = arr[b1];
|
||||||
|
if(where == 1) goto auss;
|
||||||
|
arr[b1] = arr[b0];
|
||||||
|
auss:
|
||||||
|
++b2;
|
||||||
|
b1 += (where < 2);
|
||||||
|
b0 += (where < 1);
|
||||||
|
arr[target] = curr;
|
||||||
|
|
||||||
|
/* Same as this would have been:
|
||||||
if(curr < klo) {
|
if(curr < klo) {
|
||||||
arr[b3] = arr[b2];
|
arr[b3] = arr[b2];
|
||||||
arr[b2] = arr[b1];
|
arr[b2] = arr[b1];
|
||||||
@ -176,12 +189,13 @@ static inline int schwab_partition(
|
|||||||
arr[b3] = arr[b2];
|
arr[b3] = arr[b2];
|
||||||
arr[b2] = arr[b1];
|
arr[b2] = arr[b1];
|
||||||
arr[b1] = curr;
|
arr[b1] = curr;
|
||||||
++b1; ++b2;
|
++b1;
|
||||||
} else {
|
} else {
|
||||||
arr[b3] = arr[b2];
|
arr[b3] = arr[b2];
|
||||||
arr[b2] = curr;
|
arr[b2] = curr;
|
||||||
++b2;
|
|
||||||
}
|
}
|
||||||
|
++b2;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/* [*] Swap the chosen pivot to begin of last block */
|
/* [*] Swap the chosen pivot to begin of last block */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user