diff --git a/ypsu.cpp b/ypsu.cpp index 74b3838..6428b48 100644 --- a/ypsu.cpp +++ b/ypsu.cpp @@ -639,6 +639,15 @@ void fourrots(uint32_t *arr, int n) { } // frewr - four rewrites. +// Rem.: I realized this changes the keys (rerwrites) by rotating them by 8 in each pass +// So in pass 0 we just & 0xff and put in place, but stored v gets rotated right by 8! +// So in pass 1 we can just & 0xff again - and store again rotated +// Because there are 4 rotates and 4 radices - in the end the "real" values "came back" as keys. +// Currently (in October 1, 2025) this is fastest, but I KNOW this was slower than Magyar in 2021 dec (slightly) +// Interestingly if I build that old build now, its already faster than my Magyarsort (and it misses some later opt) +// So I imagine this is because compiler technology became better and favors this algorithm more now! +// This algorithm is made by Ypsu - my friend. He made it originally as a response to Magyarsort... +// I added and measured unrolls to this and some minor optimizations - but not understanding the alg back then... void frewr(uint32_t *arr, int n) { uint32_t *tmpbuf = (uint32_t *)malloc(n * 4); mlock(tmpbuf, n * 4);