magyarsort/magyarsort.h

89 lines
2.7 KiB
C
Raw Normal View History

2021-03-11 21:22:37 +01:00
#ifndef MAGYAR_SORT_H
#define MAGYAR_SORT_H
/**
* single header lib: In-place, fast heavily modified and optimized radix sort.
*
* Only unsigned ints for now, but should be able to modify for int and float...
* This is the counting variant with smart changes (not per-bit).
*
* LICENCE: CC3 - look it up, you need to mention me but that is all
*/
2021-03-11 21:38:06 +01:00
#include <cstdio>
2021-03-11 21:22:37 +01:00
#include <cstdint>
namespace MagyarSort {
// Only change these if you know what you are doing
// I use these because I want to see if nibbles are
// better or something...
//
// Bytes of nibbles only:
// - DIGIT_RANGE and BITS_PER_DIGIT should correspond
// - DIGITS should also correspond with the uint32_t
// - and DIGIT_RANGE should be 2^n value (16 or 256)
static constexpr int DIGITS = 4; // "helyiérték"
static constexpr int BITS_PER_DIGIT = 8; // "bit / helyiérték"
static constexpr int DIGIT_RANGE = 256; // "helyiérték állapottér"
2021-03-11 22:05:55 +01:00
template<int DIGIT_CHOICE>
2021-03-11 21:22:37 +01:00
static inline uint32_t getDigit(uint32_t num) noexcept {
2021-03-11 22:05:55 +01:00
static constexpr int SHIFT = DIGIT_CHOICE * BITS_PER_DIGIT;
2021-03-11 21:22:37 +01:00
uint32_t shifted = num >> SHIFT;
2021-03-11 21:38:06 +01:00
return shifted & (DIGIT_RANGE - 1);
2021-03-11 21:22:37 +01:00
}
2021-03-11 22:05:55 +01:00
static inline void calcOccurences(uint32_t arr[], size_t size, size_t *radicsOut) {
2021-03-11 21:22:37 +01:00
for(size_t i = 0; i < size; ++i) {
// TODO: manual digits!
++radicsOut[getDigit<0>(arr[i]) + DIGIT_RANGE * 0];
++radicsOut[getDigit<1>(arr[i]) + DIGIT_RANGE * 1];
++radicsOut[getDigit<2>(arr[i]) + DIGIT_RANGE * 2];
++radicsOut[getDigit<3>(arr[i]) + DIGIT_RANGE * 3];
2021-03-11 22:05:55 +01:00
}
}
2021-03-11 21:38:06 +01:00
2021-03-11 22:05:55 +01:00
template<int DIGIT_CHOICE>
static inline void prefixSum(size_t *radics) {
static constexpr int DSTART = DIGIT_CHOICE * DIGIT_RANGE;
size_t prev = 0;
for(int i = DSTART; i < (DSTART + DIGIT_RANGE); ++i) {
radics[i] += prev;
prev = radics[i];
2021-03-11 21:22:37 +01:00
}
2021-03-11 22:05:55 +01:00
}
/** Sort the given array (in-place sorting) with the given size */
inline void sort(uint32_t arr[], size_t size) noexcept {
// Holds "digit" occurences, prefix sums, whatevers
// First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB
static thread_local size_t radics[DIGITS * DIGIT_RANGE];
for(int i = 0; i < (DIGITS * DIGIT_RANGE); ++i) { radics[i] = 0; }
// Calculate occurences of digits
calcOccurences(arr, size, radics);
// Calculate prefix sums
// TODO: Maybe should use better ILP here?
// but maybe this is more cache friendly?
// TODO: manual digits!
2021-03-11 22:05:55 +01:00
prefixSum<0>(radics);
prefixSum<1>(radics);
prefixSum<2>(radics);
prefixSum<3>(radics);
2021-03-11 21:22:37 +01:00
2021-03-11 22:05:55 +01:00
/* // DEBUG:
*/
2021-03-11 21:22:37 +01:00
for(size_t j = 0; j < DIGITS; ++j) {
printf("d%d: ", j);
for(size_t i = 0; i < DIGIT_RANGE; ++i) {
2021-03-11 21:38:06 +01:00
printf("%d,", radics[i + DIGIT_RANGE*j]);
2021-03-11 21:22:37 +01:00
}
2021-03-11 21:38:06 +01:00
printf("\n\n");
2021-03-11 21:22:37 +01:00
}
}
};
#endif