simpler occurence template
This commit is contained in:
parent
b5aeaa1bdb
commit
d487bb111b
139
magyarsort.h
139
magyarsort.h
@ -101,6 +101,15 @@ namespace MagyarSort {
|
|||||||
inline __attribute__((always_inline)) OccurenceMagic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *magicsOut) noexcept {}
|
inline __attribute__((always_inline)) OccurenceMagic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *magicsOut) noexcept {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<int DIGIT, typename COUNTER_TYP>
|
||||||
|
static inline void count_occurence_magic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *magicsOut) {
|
||||||
|
if constexpr (DIGIT >= 0) {
|
||||||
|
// Parents run first so template recursion runs DIGIT=0 first...
|
||||||
|
count_occurence_magic<DIGIT - 1, COUNTER_TYP>(arr, i, magicsOut);
|
||||||
|
++magicsOut[getDigit<DIGIT>(arr[i]) + DIGIT_RANGE * DIGIT];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** ARR_END must be an (STEP * k) */
|
/** ARR_END must be an (STEP * k) */
|
||||||
template<int ARR_END, int STEP, typename ARR_T, int R_OR_W = 0 /* 0:R, 1:W */, int LOCALITY = 3 /* 3 is best, 0 worst*/>
|
template<int ARR_END, int STEP, typename ARR_T, int R_OR_W = 0 /* 0:R, 1:W */, int LOCALITY = 3 /* 3 is best, 0 worst*/>
|
||||||
struct PrefetchMagic : public PrefetchMagic<(ARR_END - STEP), STEP, ARR_T, R_OR_W, LOCALITY> {
|
struct PrefetchMagic : public PrefetchMagic<(ARR_END - STEP), STEP, ARR_T, R_OR_W, LOCALITY> {
|
||||||
@ -124,79 +133,79 @@ namespace MagyarSort {
|
|||||||
//__builtin_prefetch(&arr[i + (1 * 16)], 0, 2); // r, L2 or L3 cache
|
//__builtin_prefetch(&arr[i + (1 * 16)], 0, 2); // r, L2 or L3 cache
|
||||||
__builtin_prefetch(&arr[i + (1 * 16)]);
|
__builtin_prefetch(&arr[i + (1 * 16)]);
|
||||||
// Creates no object, struct is empty
|
// Creates no object, struct is empty
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 1, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 1, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 2, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 2, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 3, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 3, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 4, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 4, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 5, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 5, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 6, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 6, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 7, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 7, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 8, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 8, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 9, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 9, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 10, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 10, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 11, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 11, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 12, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 12, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 13, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 13, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 14, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 14, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 15, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 15, magicsOut);
|
||||||
// Prefetch for read level-1 cache
|
// Prefetch for read level-1 cache
|
||||||
__builtin_prefetch(&arr[i + (2 * 16)]);
|
__builtin_prefetch(&arr[i + (2 * 16)]);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 16, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 16, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 17, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 17, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 18, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 18, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 19, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 19, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 20, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 20, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 21, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 21, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 22, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 22, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 23, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 23, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 24, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 24, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 25, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 25, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 26, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 26, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 27, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 27, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 28, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 28, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 29, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 29, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 30, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 30, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 31, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 31, magicsOut);
|
||||||
__builtin_prefetch(&arr[i + (3 * 16)]);
|
__builtin_prefetch(&arr[i + (3 * 16)]);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 32, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 32, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 33, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 33, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 34, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 34, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 35, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 35, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 36, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 36, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 37, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 37, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 38, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 38, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 39, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 39, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 40, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 40, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 41, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 41, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 42, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 42, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 43, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 43, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 44, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 44, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 45, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 45, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 46, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 46, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 47, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 47, magicsOut);
|
||||||
// __builtin_prefetch(&arr[i + (4 * 16)]); // Only needed for longer than 64 unrolls
|
// __builtin_prefetch(&arr[i + (4 * 16)]); // Only needed for longer than 64 unrolls
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 48, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 48, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 49, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 49, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 50, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 50, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 51, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 51, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 52, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 52, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 53, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 53, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 54, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 54, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 55, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 55, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 56, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 56, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 57, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 57, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 58, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 58, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 59, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 59, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 60, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 60, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 61, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 61, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 62, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 62, magicsOut);
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 63, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i + 63, magicsOut);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma GCC unroll 4
|
#pragma GCC unroll 4
|
||||||
for(; i < size; ++i) {
|
for(; i < size; ++i) {
|
||||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut);
|
count_occurence_magic<DIGITS - 1, COUNTER_TYP>(arr, i, magicsOut);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user