diff --git a/magyarsort.h b/magyarsort.h index 7b6bd2c..9227a94 100644 --- a/magyarsort.h +++ b/magyarsort.h @@ -13,7 +13,12 @@ #include #include // memset +// TODO: Only for the regular radix I guess +#include +#include // std::swap + namespace MagyarSort { + /* CONFIG */ // Only change these if you know what you are doing // I use these because I want to see if nibbles are @@ -23,9 +28,38 @@ namespace MagyarSort { // - DIGIT_RANGE and BITS_PER_DIGIT should correspond // - DIGITS should also correspond with the uint32_t // - and DIGIT_RANGE should be 2^n value (16 or 256) +#ifdef MAGYAR_SORT_NIBBLE + // Per-nibble digits sorting static constexpr int DIGITS = 8; // "helyiérték" static constexpr int BITS_PER_DIGIT = 4; // "bit / helyiérték" static constexpr int DIGIT_RANGE = 16; // "helyiérték állapottér" +#else + // Per-byte digits sorting + static constexpr int DIGITS = 4; // "helyiérték" + static constexpr int BITS_PER_DIGIT = 8; // "bit / helyiérték" + static constexpr int DIGIT_RANGE = 256; // "helyiérték állapottér" +#endif + + /* DEBUG */ + + void debugArr(uint32_t *arr, size_t size) { + for(int i = 0; i < size; ++i) { + printf("%x, ", arr[i]); + } + printf("\n"); + } + + void debugRadics(size_t *radics) { + for(size_t j = 0; j < DIGITS; ++j) { + printf("d%d: ", j); + for(size_t i = 0; i < DIGIT_RANGE; ++i) { + printf("%d,", radics[i + DIGIT_RANGE*j]); + } + printf("\n\n"); + } + } + + /* HELPERS */ template static inline uint32_t getDigit(uint32_t num) noexcept { @@ -73,6 +107,13 @@ namespace MagyarSort { inline PrefixMagic(size_t *radics, size_t *prev, int i) noexcept {} }; + /** Gets REFERENCE to the given digit from the radix-array that has more than one digits */ + template + static inline size_t &rGet(size_t *radics, size_t i) noexcept { + static constexpr int DSTART = (DIGIT * DIGIT_RANGE); + return radics[DSTART + i]; + } + static inline void calcPrefixSums(size_t *radics) noexcept { static thread_local size_t prev[DIGITS]; memset(prev, 0, sizeof(prev)); @@ -83,15 +124,40 @@ namespace MagyarSort { } } - void debugIt(size_t *radics) { - for(size_t j = 0; j < DIGITS; ++j) { - printf("d%d: ", j); - for(size_t i = 0; i < DIGIT_RANGE; ++i) { - printf("%d,", radics[i + DIGIT_RANGE*j]); + /** Recursive Functor: no class should be generated I think (compiler should be smart) */ + template + struct RadixMagic : public RadixMagic { + inline RadixMagic(size_t *radics, uint32_t *&from, uint32_t *&to, size_t size) noexcept // BEWARE: "*&" needed to swap pointers.. + : RadixMagic(radics, from, to, size) { + // DEBUG + //printf("%d before: ", DIGIT); + //debugArr(from, size); + + for(size_t i = size; i > 0; --i) { // right-to-left to ensure already sorted digits order we keep for iterations + // Get num and its new offset / location + auto num = from[i - 1]; + auto digVal = getDigit(num); + auto offset = (--rGet(radics, digVal)); + + // Add to the proper target location + to[offset] = num; } - printf("\n\n"); + + // DEBUG + //printf("%d after: ", DIGIT); + //debugArr(to, size); + + // Only swaps pointers :-) + std::swap(from, to); } - } + }; + /** Ends template recursion */ + template<> + struct RadixMagic<-1> { + inline RadixMagic(size_t *radics, uint32_t *&from, uint32_t *&to, size_t size) noexcept { } + }; + + /* SORT */ /** Sort the given array (in-place sorting) with the given size */ inline void sort(uint32_t arr[], size_t size) noexcept { @@ -103,12 +169,34 @@ namespace MagyarSort { // Calculate occurences of digits countOccurences(arr, size, radics); - debugIt(radics); + //debugRadics(radics); // Calculate prefix sums calcPrefixSums(radics); - debugIt(radics); + //debugRadics(radics); + + /* Regular (old) radix sort with small twist */ + + // Regular radix sort - I just changed occurence couting and prefix summing to have more ILP + // But because my approach does not use that, I want to keep this version in a branch for a + // regular radix sort using better ILP just to see how it is doing if I wrote those "Magic" + // above already anyways... + + // Regular radix sort needs a copy, see: https://www.youtube.com/watch?v=ujb2CIWE8zY + std::vector arc(size); + + uint32_t *from = arr; + uint32_t *to = &arc[0]; + + RadixMagic(radics, from, to, size); + + // With an other API we could spare this copy if we can delete original arr and return ptr or something... + // I am fine with this... this is not my main idea anyways, just little ILP tweak to regular radix sort + //if(to != arr) { // <- logically, but bad they are already swapped here!!! BEWARE + if(from != arr) { // <- in reality this is what we want because of last swap happened anyways! + memcpy(arr, from, size); + } } }; diff --git a/makefile b/makefile index 8ff2887..c8b24a0 100644 --- a/makefile +++ b/makefile @@ -2,7 +2,7 @@ debug: test.cpp magyarsort.h g++ test.cpp -g -std=c++14 -o test.out release: test.cpp magyarsort.h - g++ test.cpp -o -std=c++14 -O2 test.out + g++ test.cpp -std=c++14 -O2 -o test.out clean: test.out rm test.out diff --git a/test.cpp b/test.cpp index c302476..84ad84d 100644 --- a/test.cpp +++ b/test.cpp @@ -1,15 +1,128 @@ /* LICENCE: CC3 - look it up, you need to mention me but that is all */ +/* CONFIG */ + +// Uncomment next line to follow Creel: https://www.youtube.com/watch?v=ujb2CIWE8zY +// #define CREEL // Overwrites TEST_LEN to 16 and sets MAGYAR_SORT_NIBBLE! + +// Number of input elements to generate - unused when CREEL is defined! +#define SORT_WIDTH 40000 +// Uncomment this to use nibbles as digits and not bytes - CREEL defines this anyways +//#define MAGYAR_SORT_NIBBLE + +// Uncomment if you want to see output before / after sorts (debugging for example) +//#define PRINT_OUTPUT + +/* Includes */ + +#include #include #include +#include // std::rand | rand +#include +#include +#include // std::sort #include "magyarsort.h" +/* Input generation and prerequisites */ + +#ifdef CREEL +#define MAGYAR_SORT_NIBBLE +#define PRINT_OUTPUT +static inline std::vector GenerateInput() { + static constexpr uint32_t CreelHex[16] = { + // Homage to https://www.youtube.com/watch?v=ujb2CIWE8zY haha + // When doing nibbles these are visible all throughout all the + // steps and these will be easily readable in debugger in hex! + 0x277, + 0x806, + 0x681, + 0x462, + 0x787, + 0x163, + 0x284, + 0x166, + 0x905, + 0x518, + 0x263, + 0x395, + 0x988, + 0x307, + 0x779, + 0x721 + }; + + std::vector ret; + ret.resize(16); + + memcpy(&ret[0], CreelHex, sizeof(CreelHex)); + + return ret; +} +#else +// Randomized values, no overrides +static inline std::vector GenerateInput() { + std::vector ret; + ret.resize(SORT_WIDTH); + + for(size_t ek = 0; ek < SORT_WIDTH; ++ek) { + ret[ek] = (uint32_t)std::rand(); + } + + return ret; +} +#endif + +/* Test entry point */ + int main() { - uint32_t smallArr[16] = { 0xFF, 0xFFFFFFFF, 0xAA000000, 10, 20, 200, 1234513, 1, 0, 65535, 1024*1024, 1026*16, 7, 8, 1, 0}; + /* Input */ + std::vector in1 = GenerateInput();; + std::vector in2 = in1; // copy - MagyarSort::sort(smallArr, 16); + uint32_t *arr1 = &(in1[0]); - // TODO: check, etc. +#ifdef PRINT_OUTPUT + printf("Inp: "); + MagyarSort::debugArr(arr1, in1.size()); +#endif // PRINT_OUTPUT + + /* Our sort */ + auto ourBegin = std::chrono::high_resolution_clock::now(); + MagyarSort::sort(arr1, in1.size()); + auto ourEnd = std::chrono::high_resolution_clock::now(); + +#ifdef PRINT_OUTPUT + printf("Our: "); + MagyarSort::debugArr(arr1, in1.size()); +#endif // PRINT_OUTPUT + + /* std::sort */ + auto stdBegin = std::chrono::high_resolution_clock::now(); + std::sort(std::begin(in2), std::end(in2)); + auto stdEnd = std::chrono::high_resolution_clock::now(); + +#ifdef PRINT_OUTPUT + printf("std: "); + MagyarSort::debugArr(&in2[0], in2.size()); +#endif // PRINT_OUTPUT + + /* Check against std - the real test */ + + bool good = true; + for(size_t i = 0; good && (i < in1.size()); ++i) { + good &= (in1[i] == in2[i]); + } + + printf("Results:\n\n"); + printf("- Sorted %zu elements", in1.size()); + if(good) printf("- Same result as std::sort!\n"); + else printf("- Differs from std::sort! Error!\n"); + printf("\n"); + auto stdElapsed = std::chrono::duration_cast(stdEnd - stdBegin); + auto ourElapsed = std::chrono::duration_cast(ourEnd - ourBegin); + printf("Time (std sort): %.3f ms.\n", stdElapsed.count() * 1e-6); + printf("Time (our sort): %.3f ms.\n", ourElapsed.count() * 1e-6); return 0; }