Revert "trying more ILP in spsort - not much success and will be reverted"
This reverts commit c7e794b7ea1e10f25e64ba29f740890f3aa93b0a.
This commit is contained in:
parent
c7e794b7ea
commit
dac044fe91
@ -133,19 +133,11 @@ inline uint32_t internal_mid(uint32_t low, uint32_t high) {
|
||||
inline void spsort(uint32_t *t, int n, int m = 32);
|
||||
|
||||
/** Helper function that puts elements higher then mid to the top of the array and lower to the bottom. Returns number of bottoms */
|
||||
inline int internal_array_separate(uint32_t *t, int n, uint32_t mid, int bulk_xchg = 32) {
|
||||
inline int internal_array_separate(uint32_t *t, int n, uint32_t mid) {
|
||||
if(n > 0) {
|
||||
// Two heads that also read & write (both)
|
||||
int left = 0;
|
||||
int right = n - 1;
|
||||
|
||||
// These are needed for more ILP so that we can do the xchg operations in bulk
|
||||
// and without data dependencies just do it in an unrolled loop from time to time!
|
||||
std::vector<int> xchg_left(0);
|
||||
std::vector<int> xchg_right(0);
|
||||
xchg_left.reserve(bulk_xchg);
|
||||
xchg_right.reserve(bulk_xchg);
|
||||
|
||||
while(left < right) {
|
||||
// Step over already good positioned values from left
|
||||
while((left < right) && (t[left] < mid)) {
|
||||
@ -160,30 +152,12 @@ inline int internal_array_separate(uint32_t *t, int n, uint32_t mid, int bulk_xc
|
||||
// Extra check needed for edge-case!
|
||||
if(left < right) {
|
||||
// Both in wrong location - xchg them!
|
||||
// instead of doing it right here, collect them up! (*)
|
||||
xchg_left.push_back(left);
|
||||
xchg_right.push_back(right);
|
||||
auto tmp = t[right];
|
||||
t[right] = t[left];
|
||||
t[left] = tmp;
|
||||
++left;
|
||||
--right;
|
||||
}
|
||||
|
||||
// See if we can do some bulk-exchange now (*)
|
||||
// This loop the compiler should more easily unroll
|
||||
// and the CPU should be able to schedule ILP-wise!
|
||||
if(xchg_left.size() <= bulk_xchg) {
|
||||
for(int i = 0; i < xchg_left.size(); ++i) {
|
||||
auto tmp = t[xchg_left[i]];
|
||||
t[xchg_left[i]] = t[xchg_right[i]];
|
||||
t[xchg_right[i]] = tmp;
|
||||
}
|
||||
xchg_left.resize(0);
|
||||
xchg_right.resize(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Finish the remaining bulk exchanges (*)
|
||||
for(int i = 0; i < xchg_left.size(); ++i) {
|
||||
auto tmp = t[xchg_left[i]];
|
||||
t[xchg_left[i]] = t[xchg_right[i]];
|
||||
t[xchg_right[i]] = tmp;
|
||||
}
|
||||
|
||||
// Edge-case increment if single elem happens in middle in the end
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user