diff --git a/magyarsort.h b/magyarsort.h index 06e3a03..898f9fb 100644 --- a/magyarsort.h +++ b/magyarsort.h @@ -9,7 +9,6 @@ * LICENCE: CC3 - look it up, you need to mention me but that is all */ -#include #include #include @@ -27,14 +26,45 @@ namespace MagyarSort { static constexpr int BITS_PER_DIGIT = 8; // "bit / helyiérték" static constexpr int DIGIT_RANGE = 256; // "helyiérték állapottér" - template + template static inline uint32_t getDigit(uint32_t num) noexcept { - static constexpr int SHIFT = digitChoice * BITS_PER_DIGIT; + static constexpr int SHIFT = DIGIT_CHOICE * BITS_PER_DIGIT; uint32_t shifted = num >> SHIFT; return shifted & (DIGIT_RANGE - 1); } + static inline void calcOccurences(uint32_t arr[], size_t size, size_t *radicsOut) { + for(size_t i = 0; i < size; ++i) { + auto d0 = getDigit<0>(arr[i]); + auto d1 = getDigit<1>(arr[i]); + auto d2 = getDigit<2>(arr[i]); + auto d3 = getDigit<3>(arr[i]); + + /* // DEBUG: + printf("d0:%u, arr[i]: %u\n", d0, arr[i]); + printf("d1:%u, arr[i]: %u\n", d1, arr[i]); + printf("d2:%u, arr[i]: %u\n", d2, arr[i]); + printf("d3:%u, arr[i]: %u\n", d3, arr[i]); + */ + + ++radicsOut[d0]; + ++radicsOut[d1 + DIGIT_RANGE * 1]; + ++radicsOut[d2 + DIGIT_RANGE * 2]; + ++radicsOut[d3 + DIGIT_RANGE * 3]; + } + } + + template + static inline void prefixSum(size_t *radics) { + static constexpr int DSTART = DIGIT_CHOICE * DIGIT_RANGE; + size_t prev = 0; + for(int i = DSTART; i < (DSTART + DIGIT_RANGE); ++i) { + radics[i] += prev; + prev = radics[i]; + } + } + /** Sort the given array (in-place sorting) with the given size */ inline void sort(uint32_t arr[], size_t size) noexcept { // Holds "digit" occurences, prefix sums, whatevers @@ -43,24 +73,17 @@ namespace MagyarSort { for(int i = 0; i < (DIGITS * DIGIT_RANGE); ++i) { radics[i] = 0; } // Calculate occurences of digits - for(size_t i = 0; i < size; ++i) { - auto d0 = getDigit<0>(arr[i]); - auto d1 = getDigit<1>(arr[i]); - auto d2 = getDigit<2>(arr[i]); - auto d3 = getDigit<3>(arr[i]); + calcOccurences(arr, size, radics); - printf("d0:%u, arr[i]: %u\n", d0, arr[i]); - printf("d1:%u, arr[i]: %u\n", d1, arr[i]); - printf("d2:%u, arr[i]: %u\n", d2, arr[i]); - printf("d3:%u, arr[i]: %u\n", d3, arr[i]); + // Calculate prefix sums + // TODO: Maybe should use better ILP here? + // but maybe this is more cache friendly? + prefixSum<0>(radics); + prefixSum<1>(radics); + prefixSum<2>(radics); + prefixSum<3>(radics); - ++radics[d0]; - ++radics[d1 + DIGIT_RANGE * 1]; - ++radics[d2 + DIGIT_RANGE * 2]; - ++radics[d3 + DIGIT_RANGE * 3]; - } - - // TODO: remove debug stuffz + /* // DEBUG: for(size_t j = 0; j < DIGITS; ++j) { printf("d%d: ", j); for(size_t i = 0; i < DIGIT_RANGE; ++i) { @@ -68,6 +91,7 @@ namespace MagyarSort { } printf("\n\n"); } + */ } };