schwab: full branchless parts on partitioning now - speed gain but minimal... need like 5-6 of these

This commit is contained in:
Richard Thier 2025-05-09 03:51:37 +02:00
parent b32c7540a2
commit 4c67501511

View File

@ -158,18 +158,16 @@ static inline int schwab_partition(
/* TODO: should be copy of whole element when not just uint32s! */ /* TODO: should be copy of whole element when not just uint32s! */
uint32_t curr = arr[b3]; uint32_t curr = arr[b3];
/* Half-branchless and half-goto trickery */ /* Full branchless - see below for branch alternative */
int where = (curr < klo) ? 0 : int where = (curr < klo) ? 0 :
((curr < kmid) ? 1 : 2); ((curr < kmid) ? 1 : 2);
int target = (curr < klo) ? b0 : int target = (curr < klo) ? b0 :
((curr < kmid) ? b1 : b2); ((curr < kmid) ? b1 : b2);
arr[b3] = arr[b2]; arr[b3] = arr[b2];
if(where == 2) goto auss; arr[b2] = (where == 2) ? arr[b2] : arr[b1];
arr[b2] = arr[b1]; arr[b1] = (where == 1) ? arr[b1] : arr[b0];
if(where == 1) goto auss;
arr[b1] = arr[b0];
auss:
++b2; ++b2;
b1 += (where < 2); b1 += (where < 2);
b0 += (where < 1); b0 += (where < 1);