added pre-randomized sorts (not so great so far - probably too much cache misses)

This commit is contained in:
Richard Thier 2025-10-01 16:49:00 +02:00
parent d43b55f065
commit 7d407000fe
5 changed files with 85 additions and 5 deletions

3
.gitmodules vendored
View File

@ -4,3 +4,6 @@
[submodule "FlameGraph"]
path = FlameGraph
url = https://github.com/brendangregg/FlameGraph
[submodule "fastrand"]
path = fastrand
url = ssh://gitea@magosit.hu:8122/prenex/fastrand.git

View File

@ -1,6 +1,9 @@
#!/bin/sh
rm perf.data; perf record -e L1-dcache-load-misses:u -c 1000 -g -- ./ypsu.out
# time based sampling
rm perf.data; perf record -e L1-dcache-load-misses:u -F 99 -g -- ./ypsu.out
# counters only - might work or not
#rm perf.data; perf record -e L1-dcache-load-misses:u -c 1000 -g -- ./ypsu.out
perf script | FlameGraph/stackcollapse-perf.pl > out.perf-folded
FlameGraph/flamegraph.pl out.perf-folded > perf.svg
brave perf.svg

1
fastrand Submodule

@ -0,0 +1 @@
Subproject commit 42943f467831bfcc123214659f4b5b7f5a5e2c54

47
randominus.h Normal file
View File

@ -0,0 +1,47 @@
#ifndef RANDOMINUS_H
#define RANDOMINUS_H
/* To randomize an array - hopefully as fast as possible */
#include "fastrand/fastrand.h"
/** swap */
static void inline rd_swap(uint32_t *a, uint32_t *b) {
uint32_t tmp = *a;
*a = *b;
*b = tmp;
}
/** This is by no means "cryptographically correct" or stuff, but fast */
static inline void randominus(uint32_t *a, int n, uint32_t seed) {
/** Initialized ILP random generator */
uint32_t ilp_seeds[8];
rand_ilp_state rsi;
rand_state rs = init_rand();
for(int i = 0; i < 8; ++i) {
uint32_t choice = rand_between(&rs, 0, n);
ilp_seeds[i] = choice;
}
/** Go over the array and randomly swap stuff - hand unrolled with ILP random get! */
for(int i = 0; i < (n - 8); i += 8) {
uint32_t to0 = fastmodlike(lcg_ilp(&rsi, A), n);
uint32_t to1 = fastmodlike(lcg_ilp(&rsi, B), n);
uint32_t to2 = fastmodlike(lcg_ilp(&rsi, C), n);
uint32_t to3 = fastmodlike(lcg_ilp(&rsi, D), n);
uint32_t to4 = fastmodlike(lcg_ilp(&rsi, E), n);
uint32_t to5 = fastmodlike(lcg_ilp(&rsi, F), n);
uint32_t to6 = fastmodlike(lcg_ilp(&rsi, G), n);
uint32_t to7 = fastmodlike(lcg_ilp(&rsi, H), n);
rd_swap(&a[i], &a[to0]);
rd_swap(&a[i + 1], &a[to1]);
rd_swap(&a[i + 2], &a[to2]);
rd_swap(&a[i + 3], &a[to3]);
rd_swap(&a[i + 4], &a[to4]);
rd_swap(&a[i + 5], &a[to5]);
rd_swap(&a[i + 6], &a[to6]);
rd_swap(&a[i + 7], &a[to7]);
}
}
#endif /* RANDOMINUS_H */

View File

@ -23,6 +23,7 @@
#include "qsort/chatgpt_qs.h"
#include "threepass.h"
#include "thiersort3.h"
#include "randominus.h"
// #define MAGYAR_SORT_DEFAULT_REUSE
#include "magyarsort.h"
@ -227,6 +228,15 @@ static inline void do_thier3(uint32_t *a, int n) noexcept {
thiersort3(a, &(tmp[0]), n);
}
/** rthier */
static inline void do_rthier(uint32_t *a, int n) noexcept {
assert(n * uint32_t(sizeof(a[0])) <= INT_MAX);
uint32_t junk;
randominus(a, n, junk);
std::vector<uint32_t> tmp(n);
thiersort3(a, &(tmp[0]), n);
}
/** 3+1 pass bottom-up radix */
static inline void do_threepass(uint32_t *a, int n) noexcept {
threepass(a, n);
@ -921,7 +931,17 @@ int main(int argc, char **argv) {
});
w = v;
measure(inputtype, "magyar", [&] { MagyarSort::sort<uint32_t>(&w[0], w.size()); });
measure(inputtype, "magyar", [&] {
MagyarSort::sort<uint32_t>(&w[0], w.size());
});
assert(w == expected);
w = v;
measure(inputtype, "rmagyar", [&] {
uint32_t junk;
randominus(&w[0], w.size(), junk);
MagyarSort::sort<uint32_t>(&w[0], w.size());
});
assert(w == expected);
w = v;
@ -1011,6 +1031,10 @@ int main(int argc, char **argv) {
measure(inputtype, "thier3", [&] { do_thier3(&w[0], w.size()); });
assert(w == expected);
w = v;
measure(inputtype, "rthier", [&] { do_rthier(&w[0], w.size()); });
assert(w == expected);
w = v;
measure(inputtype, "threep", [&] { do_threepass(&w[0], w.size()); });
assert(w == expected);
@ -1046,10 +1070,12 @@ int main(int argc, char **argv) {
}
assert(w == expected);
*/
/*
w = v;
measure(inputtype, "frewr", [&] { frewr(&w[0], w.size()); });
assert(w == expected);
/*
w = v;
measure(inputtype, "vsort", [&] { vsort(&w[0], w.size()); });
assert(w == expected);