basic prefetch optimizations
This commit is contained in:
parent
e5d4ff74ad
commit
e7b677e4db
29
magyarsort.h
29
magyarsort.h
@ -88,16 +88,30 @@ namespace MagyarSort {
|
||||
/** Ends template recursion */
|
||||
template<typename COUNTER_TYP>
|
||||
struct OccurenceMagic<-1, COUNTER_TYP> {
|
||||
inline OccurenceMagic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *radicsOut) noexcept {}
|
||||
inline __attribute__((always_inline)) OccurenceMagic(uint32_t arr[], COUNTER_TYP i, COUNTER_TYP *radicsOut) noexcept {}
|
||||
};
|
||||
|
||||
/** ARR_END must be an (STEP * k) */
|
||||
template<int ARR_END, int STEP, typename ARR_T, int R_OR_W = 0 /* 0:R, 1:W */, int LOCALITY = 3 /* 3 is best, 0 worst*/>
|
||||
struct PrefetchMagic : public PrefetchMagic<(ARR_END - STEP), STEP, ARR_T, R_OR_W, LOCALITY> {
|
||||
inline __attribute__((always_inline)) PrefetchMagic(ARR_T *arr) noexcept
|
||||
: PrefetchMagic<(ARR_END - STEP), STEP, ARR_T, R_OR_W, LOCALITY>(arr) {
|
||||
__builtin_prefetch(&arr[ARR_END - STEP], R_OR_W, LOCALITY);
|
||||
}
|
||||
};
|
||||
|
||||
template<int STEP, typename ARR_T, int R_OR_W, int LOCALITY>
|
||||
struct PrefetchMagic<0, STEP, ARR_T, R_OR_W, LOCALITY> {
|
||||
inline __attribute__((always_inline)) PrefetchMagic(ARR_T *arr) noexcept {}
|
||||
};
|
||||
|
||||
template<typename COUNTER_TYP>
|
||||
static inline void countOccurences(uint32_t arr[], COUNTER_TYP size, COUNTER_TYP *radicsOut) noexcept {
|
||||
// #pragma GCC unroll 64
|
||||
COUNTER_TYP i = 0;
|
||||
// #pragma GCC unroll 4
|
||||
for(; i < size - 64; i += 64) {
|
||||
// Prefetch caches
|
||||
//__builtin_prefetch(&arr[i + 64]);
|
||||
// Prefetch for read level-1 cache
|
||||
__builtin_prefetch(&arr[i + (1 * 16)]);
|
||||
// Creates no object, struct is empty
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 1, radicsOut);
|
||||
@ -115,6 +129,8 @@ namespace MagyarSort {
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 13, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 14, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 15, radicsOut);
|
||||
// Prefetch for read level-1 cache
|
||||
__builtin_prefetch(&arr[i + (2 * 16)]);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 16, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 17, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 18, radicsOut);
|
||||
@ -131,6 +147,7 @@ namespace MagyarSort {
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 29, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 30, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 31, radicsOut);
|
||||
__builtin_prefetch(&arr[i + (3 * 16)]);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 32, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 33, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 34, radicsOut);
|
||||
@ -147,6 +164,7 @@ namespace MagyarSort {
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 45, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 46, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 47, radicsOut);
|
||||
// __builtin_prefetch(&arr[i + (4 * 16)]); // Only needed for longer than 64 unrolls
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 48, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 49, radicsOut);
|
||||
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i + 50, radicsOut);
|
||||
@ -296,6 +314,9 @@ namespace MagyarSort {
|
||||
// Holds "digit" occurences, prefix sums, whatevers
|
||||
// First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB
|
||||
static thread_local COUNTER_TYP radics[DIGITS * DIGIT_RANGE];
|
||||
// Write prefetchin'
|
||||
//__builtin_prefetch(&radicsOut[..], 1);
|
||||
PrefetchMagic<DIGITS * DIGIT_RANGE, (64/sizeof(COUNTER_TYP)), COUNTER_TYP, 1/*w*/> pm(radics);
|
||||
memset(radics, 0, sizeof(radics));
|
||||
|
||||
// Calculate occurences of digits
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user