minor optimization on mormord sort

2024-04-11 16:59:09 +02:00 · 2024-04-11 16:59:09 +02:00 · 02bad1f59f
commit 02bad1f59f
parent b2d700f127
1 changed files with 14 additions and 4 deletions
--- a/ypsu.cpp
+++ b/ypsu.cpp
@ -16,7 +16,7 @@
 #include "gptsort.h"
 #include "thiersort.h"

-#define MAGYAR_SORT_DEFAULT_REUSE
+// #define MAGYAR_SORT_DEFAULT_REUSE
 #include "magyarsort.h"

 #include "space_partitioning_sort/spsort.h"
@ -150,13 +150,23 @@ static inline uint32_t morgrab(uint32_t elem, uint32_t j) noexcept {
 static inline void mormord_sort_impl(uint32_t *a, int n, int j) noexcept {
 	/* Preparation */
 	uint32_t radics[256] = {0};
+	uint32_t radics2[256] = {0};
 	/* [from, to) index: only where prefix sums change - usually nonfull */
 	uint32_t real_radics[256 * 2] = {0};

 	/* Occurence counting O(n) */
-	/* TODO: We can go both down and upwards here to increase ILP or even do SSE2 */
-	for(uint32_t i = 0; i < n; ++i) {
-		++radics[morgrab(a[i], j)];
+	/* We can go both down and upwards here to increase ILP or even do SSE2 */
+	uint32_t k1 = 0;
+	uint32_t k2 = (n - 1);
+	for(k1 = 0; k1 < k2; ++k1, --k2) {
+		++radics[morgrab(a[k1], j)];
+		++radics2[morgrab(a[k2], j)];
+	}
+	if(k1 == k2) {
+		++radics[morgrab(a[k1], j)];
+	}
+	for(int i = 0; i < 256; ++i) {
+		radics[i] += radics2[i];
 	}

 	/* Prefix sum + real radics calc O(256) */