Revert "simpler occurence template"
This reverts commit d487bb111b93f4ab186147fd876373f46eff0e59.
This commit is contained in:
parent
d487bb111b
commit
9b9997cbdb
139
magyarsort.h
139
magyarsort.h
@ -101,15 +101,6 @@ namespace MagyarSort {
|
||||
inline __attribute__((always_inline)) OccurenceMagic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *magicsOut) noexcept {}
|
||||
};
|
||||
|
||||
template<int DIGIT, typename COUNTER_TYP>
|
||||
static inline void count_occurence_magic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *magicsOut) {
|
||||
if constexpr (DIGIT >= 0) {
|
||||
// Parents run first so template recursion runs DIGIT=0 first...
|
||||
count_occurence_magic<DIGIT - 1, COUNTER_TYP>(arr, i, magicsOut);
|
||||
++magicsOut[getDigit<DIGIT>(arr[i]) + DIGIT_RANGE * DIGIT];
|
||||
}
|
||||
}
|
||||
|
||||
/** ARR_END must be an (STEP * k) */
|
||||
template<int ARR_END, int STEP, typename ARR_T, int R_OR_W = 0 /* 0:R, 1:W */, int LOCALITY = 3 /* 3 is best, 0 worst*/>
|
||||
struct PrefetchMagic : public PrefetchMagic<(ARR_END - STEP), STEP, ARR_T, R_OR_W, LOCALITY> {
|
||||
@ -133,79 +124,79 @@ namespace MagyarSort {
|
||||
//__builtin_prefetch(&arr[i + (1 * 16)], 0, 2); // r, L2 or L3 cache
|
||||
__builtin_prefetch(&arr[i + (1 * 16)]);
|
||||
// Creates no object, struct is empty
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 1, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 2, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 3, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 4, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 5, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 6, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 7, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 8, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 9, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 10, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 11, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 12, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 13, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 14, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 15, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 1, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 2, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 3, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 4, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 5, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 6, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 7, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 8, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 9, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 10, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 11, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 12, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 13, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 14, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 15, magicsOut);
|
||||
// Prefetch for read level-1 cache
|
||||
__builtin_prefetch(&arr[i + (2 * 16)]);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 16, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 17, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 18, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 19, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 20, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 21, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 22, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 23, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 24, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 25, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 26, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 27, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 28, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 29, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 30, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 31, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 16, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 17, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 18, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 19, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 20, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 21, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 22, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 23, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 24, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 25, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 26, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 27, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 28, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 29, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 30, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 31, magicsOut);
|
||||
__builtin_prefetch(&arr[i + (3 * 16)]);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 32, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 33, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 34, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 35, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 36, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 37, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 38, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 39, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 40, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 41, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 42, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 43, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 44, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 45, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 46, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 47, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 32, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 33, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 34, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 35, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 36, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 37, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 38, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 39, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 40, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 41, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 42, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 43, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 44, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 45, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 46, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 47, magicsOut);
|
||||
// __builtin_prefetch(&arr[i + (4 * 16)]); // Only needed for longer than 64 unrolls
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 48, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 49, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 50, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 51, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 52, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 53, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 54, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 55, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 56, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 57, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 58, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 59, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 60, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 61, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 62, magicsOut);
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 63, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 48, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 49, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 50, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 51, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 52, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 53, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 54, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 55, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 56, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 57, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 58, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 59, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 60, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 61, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 62, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 63, magicsOut);
|
||||
}
|
||||
|
||||
#pragma GCC unroll 4
|
||||
for(; i < size; ++i) {
|
||||
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user