Compare commits

...

4 Commits

Author SHA1 Message Date
Richard Thier
4c67501511 schwab: full branchless parts on partitioning now - speed gain but minimal... need like 5-6 of these 2025-05-09 03:51:37 +02:00
Richard Thier
b32c7540a2 schwab: goto-optimized - barely slower on my machine 2025-05-09 03:29:20 +02:00
Richard Thier
283783bf9b Revert "schwab: tried to help a bit with ILP - does not seem to help at all"
This reverts commit bf9c22f4ecd37039fe69142e9b72db6eb50b82a9.
2025-05-09 02:44:56 +02:00
Richard Thier
bf9c22f4ec schwab: tried to help a bit with ILP - does not seem to help at all 2025-05-09 02:44:44 +02:00

View File

@ -158,11 +158,22 @@ static inline int schwab_partition(
/* TODO: should be copy of whole element when not just uint32s! */
uint32_t curr = arr[b3];
/* TODO: We can do "ILP-memcpy"s here:
*
* Key from b2->b3, value from b2->b3, key from b1->b2, value from b1... etc
* This is likely faster than calling a memcpy if we code this for not just uint32s!
*/
/* Full branchless - see below for branch alternative */
int where = (curr < klo) ? 0 :
((curr < kmid) ? 1 : 2);
int target = (curr < klo) ? b0 :
((curr < kmid) ? b1 : b2);
arr[b3] = arr[b2];
arr[b2] = (where == 2) ? arr[b2] : arr[b1];
arr[b1] = (where == 1) ? arr[b1] : arr[b0];
++b2;
b1 += (where < 2);
b0 += (where < 1);
arr[target] = curr;
/* Same as this would have been:
if(curr < klo) {
arr[b3] = arr[b2];
arr[b2] = arr[b1];
@ -176,12 +187,13 @@ static inline int schwab_partition(
arr[b3] = arr[b2];
arr[b2] = arr[b1];
arr[b1] = curr;
++b1; ++b2;
++b1;
} else {
arr[b3] = arr[b2];
arr[b2] = curr;
++b2;
}
++b2;
*/
}
/* [*] Swap the chosen pivot to begin of last block */