unlikely optimization in thiersort + measurements

This commit is contained in:
Richard Thier 2025-09-12 02:25:57 +02:00
parent 30e868d154
commit 5fc08c6fae
2 changed files with 22 additions and 6 deletions

View File

@ -4,6 +4,21 @@
#include "qsort/schwab_sort.h"
/* A non-implace tricky float-hackz based bucket sort variant. Uses schwabsort! */
#ifdef _MSC_VER
#define KM_PREFETCH(x)
#define LIKELY(x)
#define UNLIKELY(x)
#define KM_NOINLINE __declspec(noinline)
#define KM_ALWAYS_INLINE __forceinline
#else
#define KM_PREFETCH(x) __builtin_prefetch(x)
#define LIKELY(x) __builtin_expect((x),1)
/* alternative: #define LIKELY(x) __builtin_expect(((x) != 0),1) */
#define UNLIKELY(x) __builtin_expect((x),0)
#define KM_NOINLINE __attribute__ ((noinline))
#define KM_ALWAYS_INLINE __attribute__ ((always_inline))
#endif
/* Float and unsigned32 reinterpreter */
union th2_fu {
float f;
@ -103,7 +118,7 @@ static inline void thiersort2(uint32_t *arr, uint32_t *temparr, int n, sch_rand_
uint32_t pivot = temparr[i];
#pragma GCC unroll 4
for(int j = begin + 1; j < end; ++j) {
if(temparr[j] == pivot) {
if(UNLIKELY(temparr[j] == pivot)) {
/* swap to front partition */
++i;
uint32_t tmp = temparr[i];

View File

@ -842,7 +842,8 @@ void measure_single(int n) {
std::vector<uint32_t> v(n);
v = geninput(inputtype, n);
//measure(inputtype, "sp", [&] { spsort(&v[0], v.size()); });
measure(inputtype, "magyar", [&] { MagyarSort::sort<uint32_t>(&v[0], v.size()); });
//measure(inputtype, "magyar", [&] { MagyarSort::sort<uint32_t>(&v[0], v.size()); });
measure(inputtype, "thier2", [&] { do_thier2(&v[0], v.size()); });
for (auto r : results) printf("%9.3fs", r.second);
puts("");
@ -858,9 +859,9 @@ void measure_single(int n) {
int main(void) {
//int n = 100000000;
//int n = 10000000;
int n = 10000000;
//int n = 5000000;
int n = 1000000;
//int n = 1000000;
//int n = 100000;
//int n = 20001;
//int n = 20000;
@ -874,8 +875,8 @@ int main(void) {
printf("Sorting %d elements:\n\n", n);
// Uncomment this for profiling and alg!
//measure_single(n);
//return 0;
measure_single(n);
return 0;
for (auto inputtype : inputtypes) {
printf("%10s", inputtype.c_str());