minor optimization on mormord sort

This commit is contained in:
Richard Thier 2024-04-11 16:59:09 +02:00
parent b2d700f127
commit 02bad1f59f

View File

@ -16,7 +16,7 @@
#include "gptsort.h"
#include "thiersort.h"
#define MAGYAR_SORT_DEFAULT_REUSE
// #define MAGYAR_SORT_DEFAULT_REUSE
#include "magyarsort.h"
#include "space_partitioning_sort/spsort.h"
@ -150,13 +150,23 @@ static inline uint32_t morgrab(uint32_t elem, uint32_t j) noexcept {
static inline void mormord_sort_impl(uint32_t *a, int n, int j) noexcept {
/* Preparation */
uint32_t radics[256] = {0};
uint32_t radics2[256] = {0};
/* [from, to) index: only where prefix sums change - usually nonfull */
uint32_t real_radics[256 * 2] = {0};
/* Occurence counting O(n) */
/* TODO: We can go both down and upwards here to increase ILP or even do SSE2 */
for(uint32_t i = 0; i < n; ++i) {
++radics[morgrab(a[i], j)];
/* We can go both down and upwards here to increase ILP or even do SSE2 */
uint32_t k1 = 0;
uint32_t k2 = (n - 1);
for(k1 = 0; k1 < k2; ++k1, --k2) {
++radics[morgrab(a[k1], j)];
++radics2[morgrab(a[k2], j)];
}
if(k1 == k2) {
++radics[morgrab(a[k1], j)];
}
for(int i = 0; i < 256; ++i) {
radics[i] += radics2[i];
}
/* Prefix sum + real radics calc O(256) */