minor optimization on mormord sort
This commit is contained in:
parent
b2d700f127
commit
02bad1f59f
18
ypsu.cpp
18
ypsu.cpp
@ -16,7 +16,7 @@
|
||||
#include "gptsort.h"
|
||||
#include "thiersort.h"
|
||||
|
||||
#define MAGYAR_SORT_DEFAULT_REUSE
|
||||
// #define MAGYAR_SORT_DEFAULT_REUSE
|
||||
#include "magyarsort.h"
|
||||
|
||||
#include "space_partitioning_sort/spsort.h"
|
||||
@ -150,13 +150,23 @@ static inline uint32_t morgrab(uint32_t elem, uint32_t j) noexcept {
|
||||
static inline void mormord_sort_impl(uint32_t *a, int n, int j) noexcept {
|
||||
/* Preparation */
|
||||
uint32_t radics[256] = {0};
|
||||
uint32_t radics2[256] = {0};
|
||||
/* [from, to) index: only where prefix sums change - usually nonfull */
|
||||
uint32_t real_radics[256 * 2] = {0};
|
||||
|
||||
/* Occurence counting O(n) */
|
||||
/* TODO: We can go both down and upwards here to increase ILP or even do SSE2 */
|
||||
for(uint32_t i = 0; i < n; ++i) {
|
||||
++radics[morgrab(a[i], j)];
|
||||
/* We can go both down and upwards here to increase ILP or even do SSE2 */
|
||||
uint32_t k1 = 0;
|
||||
uint32_t k2 = (n - 1);
|
||||
for(k1 = 0; k1 < k2; ++k1, --k2) {
|
||||
++radics[morgrab(a[k1], j)];
|
||||
++radics2[morgrab(a[k2], j)];
|
||||
}
|
||||
if(k1 == k2) {
|
||||
++radics[morgrab(a[k1], j)];
|
||||
}
|
||||
for(int i = 0; i < 256; ++i) {
|
||||
radics[i] += radics2[i];
|
||||
}
|
||||
|
||||
/* Prefix sum + real radics calc O(256) */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user