diff --git a/magyarsort.h b/magyarsort.h index f4d68b5..4aa23e8 100644 --- a/magyarsort.h +++ b/magyarsort.h @@ -101,15 +101,6 @@ namespace MagyarSort { inline __attribute__((always_inline)) OccurenceMagic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *magicsOut) noexcept {} }; - template - static inline void count_occurence_magic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *magicsOut) { - if constexpr (DIGIT >= 0) { - // Parents run first so template recursion runs DIGIT=0 first... - count_occurence_magic(arr, i, magicsOut); - ++magicsOut[getDigit(arr[i]) + DIGIT_RANGE * DIGIT]; - } - } - /** ARR_END must be an (STEP * k) */ template struct PrefetchMagic : public PrefetchMagic<(ARR_END - STEP), STEP, ARR_T, R_OR_W, LOCALITY> { @@ -133,79 +124,79 @@ namespace MagyarSort { //__builtin_prefetch(&arr[i + (1 * 16)], 0, 2); // r, L2 or L3 cache __builtin_prefetch(&arr[i + (1 * 16)]); // Creates no object, struct is empty - count_occurence_magic(arr, i, magicsOut); - count_occurence_magic(arr, i + 1, magicsOut); - count_occurence_magic(arr, i + 2, magicsOut); - count_occurence_magic(arr, i + 3, magicsOut); - count_occurence_magic(arr, i + 4, magicsOut); - count_occurence_magic(arr, i + 5, magicsOut); - count_occurence_magic(arr, i + 6, magicsOut); - count_occurence_magic(arr, i + 7, magicsOut); - count_occurence_magic(arr, i + 8, magicsOut); - count_occurence_magic(arr, i + 9, magicsOut); - count_occurence_magic(arr, i + 10, magicsOut); - count_occurence_magic(arr, i + 11, magicsOut); - count_occurence_magic(arr, i + 12, magicsOut); - count_occurence_magic(arr, i + 13, magicsOut); - count_occurence_magic(arr, i + 14, magicsOut); - count_occurence_magic(arr, i + 15, magicsOut); + OccurenceMagic(arr, i, magicsOut); + OccurenceMagic(arr, i + 1, magicsOut); + OccurenceMagic(arr, i + 2, magicsOut); + OccurenceMagic(arr, i + 3, magicsOut); + OccurenceMagic(arr, i + 4, magicsOut); + OccurenceMagic(arr, i + 5, magicsOut); + OccurenceMagic(arr, i + 6, magicsOut); + OccurenceMagic(arr, i + 7, magicsOut); + OccurenceMagic(arr, i + 8, magicsOut); + OccurenceMagic(arr, i + 9, magicsOut); + OccurenceMagic(arr, i + 10, magicsOut); + OccurenceMagic(arr, i + 11, magicsOut); + OccurenceMagic(arr, i + 12, magicsOut); + OccurenceMagic(arr, i + 13, magicsOut); + OccurenceMagic(arr, i + 14, magicsOut); + OccurenceMagic(arr, i + 15, magicsOut); // Prefetch for read level-1 cache __builtin_prefetch(&arr[i + (2 * 16)]); - count_occurence_magic(arr, i + 16, magicsOut); - count_occurence_magic(arr, i + 17, magicsOut); - count_occurence_magic(arr, i + 18, magicsOut); - count_occurence_magic(arr, i + 19, magicsOut); - count_occurence_magic(arr, i + 20, magicsOut); - count_occurence_magic(arr, i + 21, magicsOut); - count_occurence_magic(arr, i + 22, magicsOut); - count_occurence_magic(arr, i + 23, magicsOut); - count_occurence_magic(arr, i + 24, magicsOut); - count_occurence_magic(arr, i + 25, magicsOut); - count_occurence_magic(arr, i + 26, magicsOut); - count_occurence_magic(arr, i + 27, magicsOut); - count_occurence_magic(arr, i + 28, magicsOut); - count_occurence_magic(arr, i + 29, magicsOut); - count_occurence_magic(arr, i + 30, magicsOut); - count_occurence_magic(arr, i + 31, magicsOut); + OccurenceMagic(arr, i + 16, magicsOut); + OccurenceMagic(arr, i + 17, magicsOut); + OccurenceMagic(arr, i + 18, magicsOut); + OccurenceMagic(arr, i + 19, magicsOut); + OccurenceMagic(arr, i + 20, magicsOut); + OccurenceMagic(arr, i + 21, magicsOut); + OccurenceMagic(arr, i + 22, magicsOut); + OccurenceMagic(arr, i + 23, magicsOut); + OccurenceMagic(arr, i + 24, magicsOut); + OccurenceMagic(arr, i + 25, magicsOut); + OccurenceMagic(arr, i + 26, magicsOut); + OccurenceMagic(arr, i + 27, magicsOut); + OccurenceMagic(arr, i + 28, magicsOut); + OccurenceMagic(arr, i + 29, magicsOut); + OccurenceMagic(arr, i + 30, magicsOut); + OccurenceMagic(arr, i + 31, magicsOut); __builtin_prefetch(&arr[i + (3 * 16)]); - count_occurence_magic(arr, i + 32, magicsOut); - count_occurence_magic(arr, i + 33, magicsOut); - count_occurence_magic(arr, i + 34, magicsOut); - count_occurence_magic(arr, i + 35, magicsOut); - count_occurence_magic(arr, i + 36, magicsOut); - count_occurence_magic(arr, i + 37, magicsOut); - count_occurence_magic(arr, i + 38, magicsOut); - count_occurence_magic(arr, i + 39, magicsOut); - count_occurence_magic(arr, i + 40, magicsOut); - count_occurence_magic(arr, i + 41, magicsOut); - count_occurence_magic(arr, i + 42, magicsOut); - count_occurence_magic(arr, i + 43, magicsOut); - count_occurence_magic(arr, i + 44, magicsOut); - count_occurence_magic(arr, i + 45, magicsOut); - count_occurence_magic(arr, i + 46, magicsOut); - count_occurence_magic(arr, i + 47, magicsOut); + OccurenceMagic(arr, i + 32, magicsOut); + OccurenceMagic(arr, i + 33, magicsOut); + OccurenceMagic(arr, i + 34, magicsOut); + OccurenceMagic(arr, i + 35, magicsOut); + OccurenceMagic(arr, i + 36, magicsOut); + OccurenceMagic(arr, i + 37, magicsOut); + OccurenceMagic(arr, i + 38, magicsOut); + OccurenceMagic(arr, i + 39, magicsOut); + OccurenceMagic(arr, i + 40, magicsOut); + OccurenceMagic(arr, i + 41, magicsOut); + OccurenceMagic(arr, i + 42, magicsOut); + OccurenceMagic(arr, i + 43, magicsOut); + OccurenceMagic(arr, i + 44, magicsOut); + OccurenceMagic(arr, i + 45, magicsOut); + OccurenceMagic(arr, i + 46, magicsOut); + OccurenceMagic(arr, i + 47, magicsOut); // __builtin_prefetch(&arr[i + (4 * 16)]); // Only needed for longer than 64 unrolls - count_occurence_magic(arr, i + 48, magicsOut); - count_occurence_magic(arr, i + 49, magicsOut); - count_occurence_magic(arr, i + 50, magicsOut); - count_occurence_magic(arr, i + 51, magicsOut); - count_occurence_magic(arr, i + 52, magicsOut); - count_occurence_magic(arr, i + 53, magicsOut); - count_occurence_magic(arr, i + 54, magicsOut); - count_occurence_magic(arr, i + 55, magicsOut); - count_occurence_magic(arr, i + 56, magicsOut); - count_occurence_magic(arr, i + 57, magicsOut); - count_occurence_magic(arr, i + 58, magicsOut); - count_occurence_magic(arr, i + 59, magicsOut); - count_occurence_magic(arr, i + 60, magicsOut); - count_occurence_magic(arr, i + 61, magicsOut); - count_occurence_magic(arr, i + 62, magicsOut); - count_occurence_magic(arr, i + 63, magicsOut); + OccurenceMagic(arr, i + 48, magicsOut); + OccurenceMagic(arr, i + 49, magicsOut); + OccurenceMagic(arr, i + 50, magicsOut); + OccurenceMagic(arr, i + 51, magicsOut); + OccurenceMagic(arr, i + 52, magicsOut); + OccurenceMagic(arr, i + 53, magicsOut); + OccurenceMagic(arr, i + 54, magicsOut); + OccurenceMagic(arr, i + 55, magicsOut); + OccurenceMagic(arr, i + 56, magicsOut); + OccurenceMagic(arr, i + 57, magicsOut); + OccurenceMagic(arr, i + 58, magicsOut); + OccurenceMagic(arr, i + 59, magicsOut); + OccurenceMagic(arr, i + 60, magicsOut); + OccurenceMagic(arr, i + 61, magicsOut); + OccurenceMagic(arr, i + 62, magicsOut); + OccurenceMagic(arr, i + 63, magicsOut); } #pragma GCC unroll 4 for(; i < size; ++i) { - count_occurence_magic(arr, i, magicsOut); + OccurenceMagic(arr, i, magicsOut); } }