#ifndef MAGYAR_SORT_H #define MAGYAR_SORT_H /** * single header lib: In-place, fast heavily modified and optimized radix sort. * * Only unsigned ints for now, but should be able to modify for int and float... * This is the counting variant with smart changes (not per-bit). * * LICENCE: CC3 - look it up, you need to mention me but that is all */ #include #include namespace MagyarSort { // Only change these if you know what you are doing // I use these because I want to see if nibbles are // better or something... // // Bytes of nibbles only: // - DIGIT_RANGE and BITS_PER_DIGIT should correspond // - DIGITS should also correspond with the uint32_t // - and DIGIT_RANGE should be 2^n value (16 or 256) static constexpr int DIGITS = 4; // "helyiérték" static constexpr int BITS_PER_DIGIT = 8; // "bit / helyiérték" static constexpr int DIGIT_RANGE = 256; // "helyiérték állapottér" template static inline uint32_t getDigit(uint32_t num) noexcept { static constexpr int SHIFT = DIGIT_CHOICE * BITS_PER_DIGIT; uint32_t shifted = num >> SHIFT; return shifted & (DIGIT_RANGE - 1); } /** Functor: no class should be generated I think (compiler should be smart) */ template struct OccurenceMagic : public OccurenceMagic { inline OccurenceMagic(uint32_t arr[], size_t i, size_t *radicsOut) noexcept : OccurenceMagic(arr, i, radicsOut) { // parents run first so template recursion runs DIGIT=0 first... ++radicsOut[getDigit(arr[i]) + DIGIT_RANGE * DIGIT]; } }; /** Ends template recursion */ template<> struct OccurenceMagic<-1> { inline OccurenceMagic(uint32_t arr[], size_t i, size_t *radicsOut) noexcept { /* empty */ } }; static inline void calcOccurences(uint32_t arr[], size_t size, size_t *radicsOut) { for(size_t i = 0; i < size; ++i) { // Creates no object, struct is empty OccurenceMagic(arr, i, radicsOut); } } template static inline void prefixSum(size_t *radics) { static constexpr int DSTART = DIGIT_CHOICE * DIGIT_RANGE; size_t prev = 0; for(int i = DSTART; i < (DSTART + DIGIT_RANGE); ++i) { radics[i] += prev; prev = radics[i]; } } /** Sort the given array (in-place sorting) with the given size */ inline void sort(uint32_t arr[], size_t size) noexcept { // Holds "digit" occurences, prefix sums, whatevers // First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB static thread_local size_t radics[DIGITS * DIGIT_RANGE]; for(int i = 0; i < (DIGITS * DIGIT_RANGE); ++i) { radics[i] = 0; } // Calculate occurences of digits calcOccurences(arr, size, radics); // Calculate prefix sums // TODO: Maybe should use better ILP here? // but maybe this is more cache friendly? // TODO: manual digits! prefixSum<0>(radics); prefixSum<1>(radics); prefixSum<2>(radics); prefixSum<3>(radics); /* // DEBUG: */ for(size_t j = 0; j < DIGITS; ++j) { printf("d%d: ", j); for(size_t i = 0; i < DIGIT_RANGE; ++i) { printf("%d,", radics[i + DIGIT_RANGE*j]); } printf("\n\n"); } } }; #endif