mormord sort - working version, slow on random input for me

This commit is contained in:
Richard Thier 2024-04-11 16:41:08 +02:00
parent 55583bcb4a
commit b2d700f127

View File

@ -33,16 +33,14 @@ void measure(const std::string &inputtype, const std::string &name,
worst[name] = std::max(worst[name], seconds);
}
std::vector<std::string> inputtypes = {
/*
"constant"
"asc"
"desc"
"ascasc"
"constant",
"asc",
"desc",
"ascasc",
"ascdesc",
"descasc"
"descdesc"
"descasc",
"descdesc",
"rand",
*/
"smallrange",
};
std::vector<uint32_t> geninput(const std::string &type, int n) {
@ -150,34 +148,54 @@ static inline uint32_t morgrab(uint32_t elem, uint32_t j) noexcept {
return (elem >> (8 * j)) & 0xff;
}
static inline void mormord_sort_impl(uint32_t *a, int n, int j) noexcept {
// Occurence count
uint32_t prefix[256] = { 0 };
uint32_t index[256] = { 0 };
/* Preparation */
uint32_t radics[256] = {0};
/* [from, to) index: only where prefix sums change - usually nonfull */
uint32_t real_radics[256 * 2] = {0};
/* Occurence counting O(n) */
/* TODO: We can go both down and upwards here to increase ILP or even do SSE2 */
for(uint32_t i = 0; i < n; ++i) {
// ++prefix[(a[i] >> (8 * j)) && 0xff];
++prefix[morgrab(a[i], j)];
++radics[morgrab(a[i], j)];
}
// Prefix sum
index[0] = prefix[0];
for(uint32_t i = 1; i < 256; ++i) {
prefix[i] += prefix[i - 1];
index[i] = prefix[i];
/* Prefix sum + real radics calc O(256) */
/* Radics: */
/* fr: {10, 20, 10, 0, 5, 15,...} */
/* to: {10, 30, 40, 40, 45, 60,..} */
/* Real radics: */
/* to: {[0, 10], [10, 30], [30, 40], [40, 45], [45, 60]} */
/* 0. 1. 2. 4. 5. */
/* (because radix value 3 is not found in input) */
uint32_t prev = 0;
uint32_t reali = 0;
for(int i = 0; i < 256; ++i) {
if(radics[i] != 0) {
radics[i] += prev;
real_radics[reali] = prev;
real_radics[reali + 1] = radics[i];
prev = radics[i];
reali += 2;
} else {
radics[i] += prev;
prev = radics[i];
}
}
// Inplace swap
uint32_t pivoti = 0;
while(pivoti < n) {
uint32_t radixval = morgrab(a[pivoti], j);
uint32_t targeti = index[radixval] - 1;
uint32_t targeti = radics[radixval] - 1;
if(targeti > pivoti) {
// swap
uint32_t tmp = a[pivoti];
a[pivoti] = a[targeti];
a[targeti] = tmp;
// dec index
--index[radixval];
--radics[radixval];
} else {
// progress pivot
++pivoti;
}
}
@ -186,11 +204,13 @@ static inline void mormord_sort_impl(uint32_t *a, int n, int j) noexcept {
if(j == 0) return;
// Recursion
for(uint32_t i = 0; i < 256; ++i) {
uint32_t from = index[i];
uint32_t to = prefix[i];
if(from != to) {
mormord_sort_impl(&a[from - 1], (to - (from - 1)), j - 1);
for(int i = 0; i < reali; i += 2) {
/* inclusive */
uint32_t from = real_radics[i];
/* non-inclusive */
uint32_t to = real_radics[i + 1];
if(from < to) { // TODO: check if this "if" is needed!
mormord_sort_impl(&a[from], (to - (from)), j - 1);
}
}
}
@ -595,12 +615,13 @@ void measure_single(int n) {
int main(void) {
//int n = 100000000;
//int n = 10000000;
int n = 10000000;
//int n = 1000000;
//int n = 100000;
//int n = 10000;
//int n = 1000;
//int n = 100;
int n = 10;
//int n = 10;
printf("Sorting %d elements:\n\n", n);
@ -610,7 +631,7 @@ int main(void) {
for (auto inputtype : inputtypes) {
printf("%10s", inputtype.c_str());
fflush(stdout);
// fflush(stdout); // XXX: FIXME?
std::vector<uint32_t> v(n), w(n), expected(n);
v = geninput(inputtype, n);
measure(inputtype, "copy", [&] { w = v; });
@ -626,11 +647,9 @@ int main(void) {
w.swap(buf);
}
});
/*
w = v;
measure(inputtype, "magyar", [&] { MagyarSort::sort<uint32_t>(&w[0], w.size()); });
assert(w == expected);
*/
w = v;
measure(inputtype, "mormord", [&] { mormord_sort(&w[0], w.size()); });
assert(w == expected);