diff --git a/ypsu.cpp b/ypsu.cpp index 0f778d7..0da4382 100644 --- a/ypsu.cpp +++ b/ypsu.cpp @@ -185,18 +185,16 @@ static inline void mormord_sort_impl(uint32_t *a, int n) noexcept { /* (because radix value 3 is not found in input) */ uint32_t prev = 0; uint32_t reali = 0; - #pragma GCC unroll 16 + #pragma GCC unroll 256 for(int i = 0; i < 256; ++i) { radics[i] += prev; - if(radics[i] != 0) { - real_radics[reali] = prev; - real_radics[reali + 1] = radics[i]; - reali += 2; - } + bool act =(radics[i] != 0); + real_radics[reali] = act ? prev : real_radics[reali]; + real_radics[reali + 1] = act ? radics[i] : real_radics[reali + 1]; + reali += 2 * act; prev = radics[i]; } - // Inplace swap - own ideas + some ideas based-on "famous" ct-swap (for branchless / more ILP): // void ct-swap(bool secret, uint64_t a[], uint64_t b[], size_t len) { // uint64_t mask = ~((uint64_t)secret - 1); // 1->111....111; 0->000....000