mormord sort - working version, slow on random input for me

This commit is contained in:
Richard Thier 2024-04-11 16:41:08 +02:00
parent 55583bcb4a
commit b2d700f127

View File

@ -33,16 +33,14 @@ void measure(const std::string &inputtype, const std::string &name,
worst[name] = std::max(worst[name], seconds); worst[name] = std::max(worst[name], seconds);
} }
std::vector<std::string> inputtypes = { std::vector<std::string> inputtypes = {
/* "constant",
"constant" "asc",
"asc" "desc",
"desc" "ascasc",
"ascasc"
"ascdesc", "ascdesc",
"descasc" "descasc",
"descdesc" "descdesc",
"rand", "rand",
*/
"smallrange", "smallrange",
}; };
std::vector<uint32_t> geninput(const std::string &type, int n) { std::vector<uint32_t> geninput(const std::string &type, int n) {
@ -150,34 +148,54 @@ static inline uint32_t morgrab(uint32_t elem, uint32_t j) noexcept {
return (elem >> (8 * j)) & 0xff; return (elem >> (8 * j)) & 0xff;
} }
static inline void mormord_sort_impl(uint32_t *a, int n, int j) noexcept { static inline void mormord_sort_impl(uint32_t *a, int n, int j) noexcept {
// Occurence count /* Preparation */
uint32_t prefix[256] = { 0 }; uint32_t radics[256] = {0};
uint32_t index[256] = { 0 }; /* [from, to) index: only where prefix sums change - usually nonfull */
uint32_t real_radics[256 * 2] = {0};
/* Occurence counting O(n) */
/* TODO: We can go both down and upwards here to increase ILP or even do SSE2 */
for(uint32_t i = 0; i < n; ++i) { for(uint32_t i = 0; i < n; ++i) {
// ++prefix[(a[i] >> (8 * j)) && 0xff]; ++radics[morgrab(a[i], j)];
++prefix[morgrab(a[i], j)];
} }
// Prefix sum /* Prefix sum + real radics calc O(256) */
index[0] = prefix[0]; /* Radics: */
for(uint32_t i = 1; i < 256; ++i) { /* fr: {10, 20, 10, 0, 5, 15,...} */
prefix[i] += prefix[i - 1]; /* to: {10, 30, 40, 40, 45, 60,..} */
index[i] = prefix[i]; /* Real radics: */
/* to: {[0, 10], [10, 30], [30, 40], [40, 45], [45, 60]} */
/* 0. 1. 2. 4. 5. */
/* (because radix value 3 is not found in input) */
uint32_t prev = 0;
uint32_t reali = 0;
for(int i = 0; i < 256; ++i) {
if(radics[i] != 0) {
radics[i] += prev;
real_radics[reali] = prev;
real_radics[reali + 1] = radics[i];
prev = radics[i];
reali += 2;
} else {
radics[i] += prev;
prev = radics[i];
}
} }
// Inplace swap // Inplace swap
uint32_t pivoti = 0; uint32_t pivoti = 0;
while(pivoti < n) { while(pivoti < n) {
uint32_t radixval = morgrab(a[pivoti], j); uint32_t radixval = morgrab(a[pivoti], j);
uint32_t targeti = index[radixval] - 1; uint32_t targeti = radics[radixval] - 1;
if(targeti > pivoti) { if(targeti > pivoti) {
// swap // swap
uint32_t tmp = a[pivoti]; uint32_t tmp = a[pivoti];
a[pivoti] = a[targeti]; a[pivoti] = a[targeti];
a[targeti] = tmp; a[targeti] = tmp;
// dec index // dec index
--index[radixval]; --radics[radixval];
} else { } else {
// progress pivot
++pivoti; ++pivoti;
} }
} }
@ -186,11 +204,13 @@ static inline void mormord_sort_impl(uint32_t *a, int n, int j) noexcept {
if(j == 0) return; if(j == 0) return;
// Recursion // Recursion
for(uint32_t i = 0; i < 256; ++i) { for(int i = 0; i < reali; i += 2) {
uint32_t from = index[i]; /* inclusive */
uint32_t to = prefix[i]; uint32_t from = real_radics[i];
if(from != to) { /* non-inclusive */
mormord_sort_impl(&a[from - 1], (to - (from - 1)), j - 1); uint32_t to = real_radics[i + 1];
if(from < to) { // TODO: check if this "if" is needed!
mormord_sort_impl(&a[from], (to - (from)), j - 1);
} }
} }
} }
@ -595,12 +615,13 @@ void measure_single(int n) {
int main(void) { int main(void) {
//int n = 100000000; //int n = 100000000;
//int n = 10000000; int n = 10000000;
//int n = 1000000; //int n = 1000000;
//int n = 100000; //int n = 100000;
//int n = 10000; //int n = 10000;
//int n = 1000;
//int n = 100; //int n = 100;
int n = 10; //int n = 10;
printf("Sorting %d elements:\n\n", n); printf("Sorting %d elements:\n\n", n);
@ -610,7 +631,7 @@ int main(void) {
for (auto inputtype : inputtypes) { for (auto inputtype : inputtypes) {
printf("%10s", inputtype.c_str()); printf("%10s", inputtype.c_str());
fflush(stdout); // fflush(stdout); // XXX: FIXME?
std::vector<uint32_t> v(n), w(n), expected(n); std::vector<uint32_t> v(n), w(n), expected(n);
v = geninput(inputtype, n); v = geninput(inputtype, n);
measure(inputtype, "copy", [&] { w = v; }); measure(inputtype, "copy", [&] { w = v; });
@ -626,11 +647,9 @@ int main(void) {
w.swap(buf); w.swap(buf);
} }
}); });
/*
w = v; w = v;
measure(inputtype, "magyar", [&] { MagyarSort::sort<uint32_t>(&w[0], w.size()); }); measure(inputtype, "magyar", [&] { MagyarSort::sort<uint32_t>(&w[0], w.size()); });
assert(w == expected); assert(w == expected);
*/
w = v; w = v;
measure(inputtype, "mormord", [&] { mormord_sort(&w[0], w.size()); }); measure(inputtype, "mormord", [&] { mormord_sort(&w[0], w.size()); });
assert(w == expected); assert(w == expected);