more manual unrolls

This commit is contained in:
Richard Thier 2021-12-17 23:37:48 +01:00
parent 645bc19f19
commit e5d4ff74ad

View File

@ -193,15 +193,30 @@ namespace MagyarSort {
return radics[DSTART + i];
}
/** Helper for calcPrefixSums */
template<int DIGIT, typename COUNTER_TYP>
struct PMagic2 : public PMagic2<DIGIT - 1, COUNTER_TYP> {
inline __attribute__((always_inline)) PMagic2(COUNTER_TYP *radics, COUNTER_TYP *prev)
: PMagic2<DIGIT - 1, COUNTER_TYP>(radics, prev) {
// Again first the 0th digit because of parent constructors!
// This is a template-unrolled loop too
PrefixMagic<DIGITS - 1, COUNTER_TYP>(radics, prev, DIGIT);
}
};
/** Template recursion endpoint */
template<typename COUNTER_TYP>
struct PMagic2<-1, COUNTER_TYP> {
inline __attribute__((always_inline)) PMagic2(COUNTER_TYP *radics, COUNTER_TYP *prev) {}
};
template<typename COUNTER_TYP>
static inline void calcPrefixSums(COUNTER_TYP *radics) noexcept {
static thread_local COUNTER_TYP prev[DIGITS];
memset(prev, 0, sizeof(prev));
for(int i = 0; i < DIGIT_RANGE; ++i) {
// This is a template-unrolled loop too
PrefixMagic<DIGITS - 1, COUNTER_TYP>(radics, prev, i);
}
// This is a template-unrolled loop too
PMagic2<DIGIT_RANGE - 1, COUNTER_TYP>(radics, prev);
}
/** Recursive Functor: no class should be generated I think (compiler should be smart) */