simpler occurence template

This commit is contained in:
Richard Thier 2025-10-02 02:28:46 +02:00
parent b5aeaa1bdb
commit d487bb111b

View File

@ -101,6 +101,15 @@ namespace MagyarSort {
inline __attribute__((always_inline)) OccurenceMagic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *magicsOut) noexcept {} inline __attribute__((always_inline)) OccurenceMagic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *magicsOut) noexcept {}
}; };
template<int DIGIT, typename COUNTER_TYP>
static inline void count_occurence_magic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *magicsOut) {
if constexpr (DIGIT >= 0) {
// Parents run first so template recursion runs DIGIT=0 first...
count_occurence_magic<DIGIT - 1, COUNTER_TYP>(arr, i, magicsOut);
++magicsOut[getDigit<DIGIT>(arr[i]) + DIGIT_RANGE * DIGIT];
}
}
/** ARR_END must be an (STEP * k) */ /** ARR_END must be an (STEP * k) */
template<int ARR_END, int STEP, typename ARR_T, int R_OR_W = 0 /* 0:R, 1:W */, int LOCALITY = 3 /* 3 is best, 0 worst*/> template<int ARR_END, int STEP, typename ARR_T, int R_OR_W = 0 /* 0:R, 1:W */, int LOCALITY = 3 /* 3 is best, 0 worst*/>
struct PrefetchMagic : public PrefetchMagic<(ARR_END - STEP), STEP, ARR_T, R_OR_W, LOCALITY> { struct PrefetchMagic : public PrefetchMagic<(ARR_END - STEP), STEP, ARR_T, R_OR_W, LOCALITY> {
@ -124,79 +133,79 @@ namespace MagyarSort {
//__builtin_prefetch(&arr[i + (1 * 16)], 0, 2); // r, L2 or L3 cache //__builtin_prefetch(&arr[i + (1 * 16)], 0, 2); // r, L2 or L3 cache
__builtin_prefetch(&arr[i + (1 * 16)]); __builtin_prefetch(&arr[i + (1 * 16)]);
// Creates no object, struct is empty // Creates no object, struct is empty
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 1, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 1, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 2, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 2, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 3, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 3, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 4, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 4, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 5, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 5, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 6, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 6, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 7, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 7, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 8, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 8, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 9, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 9, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 10, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 10, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 11, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 11, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 12, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 12, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 13, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 13, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 14, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 14, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 15, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 15, magicsOut);
// Prefetch for read level-1 cache // Prefetch for read level-1 cache
__builtin_prefetch(&arr[i + (2 * 16)]); __builtin_prefetch(&arr[i + (2 * 16)]);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 16, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 16, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 17, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 17, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 18, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 18, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 19, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 19, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 20, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 20, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 21, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 21, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 22, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 22, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 23, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 23, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 24, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 24, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 25, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 25, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 26, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 26, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 27, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 27, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 28, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 28, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 29, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 29, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 30, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 30, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 31, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 31, magicsOut);
__builtin_prefetch(&arr[i + (3 * 16)]); __builtin_prefetch(&arr[i + (3 * 16)]);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 32, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 32, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 33, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 33, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 34, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 34, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 35, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 35, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 36, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 36, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 37, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 37, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 38, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 38, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 39, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 39, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 40, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 40, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 41, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 41, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 42, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 42, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 43, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 43, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 44, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 44, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 45, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 45, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 46, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 46, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 47, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 47, magicsOut);
// __builtin_prefetch(&arr[i + (4 * 16)]); // Only needed for longer than 64 unrolls // __builtin_prefetch(&arr[i + (4 * 16)]); // Only needed for longer than 64 unrolls
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 48, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 48, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 49, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 49, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 50, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 50, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 51, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 51, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 52, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 52, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 53, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 53, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 54, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 54, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 55, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 55, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 56, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 56, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 57, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 57, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 58, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 58, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 59, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 59, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 60, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 60, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 61, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 61, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 62, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 62, magicsOut);
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 63, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 63, magicsOut);
} }
#pragma GCC unroll 4 #pragma GCC unroll 4
for(; i < size; ++i) { for(; i < size; ++i) {
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut); count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut);
} }
} }