From edbb59d7a1f55f1bfb1e87563af6f6f6d7e75ea5 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Tue, 16 Aug 2022 15:50:36 +0200 Subject: [PATCH] spsort got twovalue sort special case (no infinite recursion) --- space_partitioning_sort/spsort.h | 79 ++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 3 deletions(-) diff --git a/space_partitioning_sort/spsort.h b/space_partitioning_sort/spsort.h index 94f4d66..3e6f23c 100644 --- a/space_partitioning_sort/spsort.h +++ b/space_partitioning_sort/spsort.h @@ -63,6 +63,63 @@ void binsertion_sort(uint32_t *a, int n) { } } +/** Specific sort that sorts arrays that can only have two or one different values (but many of those). O(n) strictly linear! */ +void twovalue_sort(uint32_t *t, int n) { + if(n > 0) { + // Prepare for counting + uint32_t v1 = t[0]; + bool has_v2 = false; + uint32_t v2; + int c1 = 0; + int c2 = 0; + int i = 0; + + // Find second value (if there is any) and stop there + while((i < n) && !has_v2) { + has_v2 = (t[i] != v1); + v2 = t[i]; // can be junk, but only used when has_v2 == true! + c1 += !has_v2; + c2 += has_v2; + ++i; + } + + // TODO: This only works for numbers from now on... + // But if we keep all until this point, we can + // do a similar "xchg to the sides" trick here + // by knowing which value is smaller and thus + // code this to work generally if needed! + + // Finish counting loop + while(i < n) { + c1 += (v1 == t[i]); // count v1s + c2 += has_v2 ? (v2 == t[i]) : 0; + ++i; + } + + // We have counts of value variants, just write them back + + // Get which is the smaller value and how many there is? + int sv = has_v2 ? ((v2 < v1) ? v2 : v1): v1; + int sc = has_v2 ? ((v2 < v1) ? c2 : c1): c1; + + // Write out the smaller values + i = 0; + while(i < sc) { + t[i] = sv; + ++i; + } + + // Write out the big values for all remaining places + if(i < n) { + int bv = has_v2 ? ((v2 >= v1) ? v2 : v1): v1; + while(i < n) { + t[i] = bv; + ++i; + } + } + } +} + /** Overflow-safe and generally safe */ inline uint32_t internal_mid(uint32_t low, uint32_t high) { /* Unsigned-ness make this overflow-safe */ @@ -166,7 +223,7 @@ inline void internal_spsort(uint32_t *t, int n, int m, uint32_t low, uint32_t mi // means we can small-sort both. binsertion_sort(t + left, rights); } else { - // RE-PIVOT!!! + // RE-PIVOT!!! [right] // This tries to help for cases where the // domain of da values have a few outliers // which would cause many unnecessity split. @@ -174,7 +231,23 @@ inline void internal_spsort(uint32_t *t, int n, int m, uint32_t low, uint32_t mi // This way the midpoint and all totally get // re-evaluated too, which adds an O(n) here // but make us have fewer extra steps hopefully. - spsort(t + left, rights, m); + if(lefts != 0) { + // Regular case (left had some elements at least) + spsort(t + left, rights, m); + } else { + // Extreme case where left was empty! + // This would lead to endless recursion + // so we need to handle this separately! + // + // When left is totally empty that means + // that right bucket can only have at most + // two distict values among its elements or + // less (that is it is either const values + // in a big array OR there are two different + // values in it in any ways). For this we + // implemented a special and optimized sort... + twovalue_sort(t + left, rights); // O(n) + } } } else { // lefts are enough in count @@ -182,7 +255,7 @@ inline void internal_spsort(uint32_t *t, int n, int m, uint32_t low, uint32_t mi // if rights are too few, we insertion sort them binsertion_sort(t + left, rights); - // RE-PIVOT!!! + // RE-PIVOT!!! [left] // and we also need repivot similar to above! spsort(t, lefts, m); } else {