Likely better ILP and no manual digit counts in code
This commit is contained in:
parent
22e80d4cd5
commit
151b8f398b
49
magyarsort.h
49
magyarsort.h
@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <cstring> // memset
|
||||||
|
|
||||||
namespace MagyarSort {
|
namespace MagyarSort {
|
||||||
|
|
||||||
@ -34,21 +35,19 @@ namespace MagyarSort {
|
|||||||
return shifted & (DIGIT_RANGE - 1);
|
return shifted & (DIGIT_RANGE - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Functor: no class should be generated I think (compiler should be smart) */
|
/** Recursive Functor: no class should be generated I think (compiler should be smart) */
|
||||||
template<int DIGIT>
|
template<int DIGIT>
|
||||||
struct OccurenceMagic : public OccurenceMagic<DIGIT - 1> {
|
struct OccurenceMagic : public OccurenceMagic<DIGIT - 1> {
|
||||||
inline OccurenceMagic(uint32_t arr[], size_t i, size_t *radicsOut) noexcept
|
inline OccurenceMagic(uint32_t arr[], size_t i, size_t *radicsOut) noexcept
|
||||||
: OccurenceMagic<DIGIT -1 >(arr, i, radicsOut) {
|
: OccurenceMagic<DIGIT -1 >(arr, i, radicsOut) {
|
||||||
// parents run first so template recursion runs DIGIT=0 first...
|
// Parents run first so template recursion runs DIGIT=0 first...
|
||||||
++radicsOut[getDigit<DIGIT>(arr[i]) + DIGIT_RANGE * DIGIT];
|
++radicsOut[getDigit<DIGIT>(arr[i]) + DIGIT_RANGE * DIGIT];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
/** Ends template recursion */
|
/** Ends template recursion */
|
||||||
template<>
|
template<>
|
||||||
struct OccurenceMagic<-1> {
|
struct OccurenceMagic<-1> {
|
||||||
inline OccurenceMagic(uint32_t arr[], size_t i, size_t *radicsOut) noexcept {
|
inline OccurenceMagic(uint32_t arr[], size_t i, size_t *radicsOut) noexcept {}
|
||||||
/* empty */
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void countOccurences(uint32_t arr[], size_t size, size_t *radicsOut) noexcept {
|
static inline void countOccurences(uint32_t arr[], size_t size, size_t *radicsOut) noexcept {
|
||||||
@ -58,13 +57,29 @@ namespace MagyarSort {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int DIGIT_CHOICE>
|
/** Recursive Functor: no class should be generated I think (compiler should be smart) */
|
||||||
static inline void prefixSum(size_t *radics) noexcept {
|
template<int DIGIT>
|
||||||
static constexpr int DSTART = DIGIT_CHOICE * DIGIT_RANGE;
|
struct PrefixMagic : public PrefixMagic<DIGIT - 1> {
|
||||||
size_t prev = 0;
|
inline PrefixMagic(size_t *radics, size_t *prev, int i) noexcept
|
||||||
for(int i = DSTART; i < (DSTART + DIGIT_RANGE); ++i) {
|
: PrefixMagic<DIGIT - 1>(radics, prev, i) {
|
||||||
radics[i] += prev;
|
static constexpr int DSTART = (DIGIT * DIGIT_RANGE);
|
||||||
prev = radics[i];
|
radics[DSTART + i] += prev[DIGIT];
|
||||||
|
prev[DIGIT] = radics[DSTART + i];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
/** Ends template recursion */
|
||||||
|
template<>
|
||||||
|
struct PrefixMagic<-1> {
|
||||||
|
inline PrefixMagic(size_t *radics, size_t *prev, int i) noexcept {}
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void calcPrefixSums(size_t *radics) noexcept {
|
||||||
|
static thread_local size_t prev[DIGITS];
|
||||||
|
memset(prev, 0, sizeof(prev));
|
||||||
|
|
||||||
|
for(int i = 0; i < DIGIT_RANGE; ++i) {
|
||||||
|
// This is a template-unrolled loop too
|
||||||
|
PrefixMagic<DIGITS - 1>(radics, prev, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,19 +88,13 @@ namespace MagyarSort {
|
|||||||
// Holds "digit" occurences, prefix sums, whatevers
|
// Holds "digit" occurences, prefix sums, whatevers
|
||||||
// First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB
|
// First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB
|
||||||
static thread_local size_t radics[DIGITS * DIGIT_RANGE];
|
static thread_local size_t radics[DIGITS * DIGIT_RANGE];
|
||||||
for(int i = 0; i < (DIGITS * DIGIT_RANGE); ++i) { radics[i] = 0; }
|
memset(radics, 0, sizeof(radics));
|
||||||
|
|
||||||
// Calculate occurences of digits
|
// Calculate occurences of digits
|
||||||
countOccurences(arr, size, radics);
|
countOccurences(arr, size, radics);
|
||||||
|
|
||||||
// Calculate prefix sums
|
// Calculate prefix sums
|
||||||
// TODO: Maybe should use better ILP here?
|
calcPrefixSums(radics);
|
||||||
// but maybe this is more cache friendly?
|
|
||||||
// TODO: manual digits!
|
|
||||||
prefixSum<0>(radics);
|
|
||||||
prefixSum<1>(radics);
|
|
||||||
prefixSum<2>(radics);
|
|
||||||
prefixSum<3>(radics);
|
|
||||||
|
|
||||||
/* // DEBUG:
|
/* // DEBUG:
|
||||||
*/
|
*/
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user