diff --git a/ypsu.cpp b/ypsu.cpp index 0da4382..0f778d7 100644 --- a/ypsu.cpp +++ b/ypsu.cpp @@ -185,16 +185,18 @@ static inline void mormord_sort_impl(uint32_t *a, int n) noexcept { /* (because radix value 3 is not found in input) */ uint32_t prev = 0; uint32_t reali = 0; - #pragma GCC unroll 256 + #pragma GCC unroll 16 for(int i = 0; i < 256; ++i) { radics[i] += prev; - bool act =(radics[i] != 0); - real_radics[reali] = act ? prev : real_radics[reali]; - real_radics[reali + 1] = act ? radics[i] : real_radics[reali + 1]; - reali += 2 * act; + if(radics[i] != 0) { + real_radics[reali] = prev; + real_radics[reali + 1] = radics[i]; + reali += 2; + } prev = radics[i]; } + // Inplace swap - own ideas + some ideas based-on "famous" ct-swap (for branchless / more ILP): // void ct-swap(bool secret, uint64_t a[], uint64_t b[], size_t len) { // uint64_t mask = ~((uint64_t)secret - 1); // 1->111....111; 0->000....000