spsort got twovalue sort special case (no infinite recursion)

This commit is contained in:
Richard Thier 2022-08-16 15:50:36 +02:00
parent 6a0a2540bb
commit edbb59d7a1

View File

@ -63,6 +63,63 @@ void binsertion_sort(uint32_t *a, int n) {
}
}
/** Specific sort that sorts arrays that can only have two or one different values (but many of those). O(n) strictly linear! */
void twovalue_sort(uint32_t *t, int n) {
if(n > 0) {
// Prepare for counting
uint32_t v1 = t[0];
bool has_v2 = false;
uint32_t v2;
int c1 = 0;
int c2 = 0;
int i = 0;
// Find second value (if there is any) and stop there
while((i < n) && !has_v2) {
has_v2 = (t[i] != v1);
v2 = t[i]; // can be junk, but only used when has_v2 == true!
c1 += !has_v2;
c2 += has_v2;
++i;
}
// TODO: This only works for numbers from now on...
// But if we keep all until this point, we can
// do a similar "xchg to the sides" trick here
// by knowing which value is smaller and thus
// code this to work generally if needed!
// Finish counting loop
while(i < n) {
c1 += (v1 == t[i]); // count v1s
c2 += has_v2 ? (v2 == t[i]) : 0;
++i;
}
// We have counts of value variants, just write them back
// Get which is the smaller value and how many there is?
int sv = has_v2 ? ((v2 < v1) ? v2 : v1): v1;
int sc = has_v2 ? ((v2 < v1) ? c2 : c1): c1;
// Write out the smaller values
i = 0;
while(i < sc) {
t[i] = sv;
++i;
}
// Write out the big values for all remaining places
if(i < n) {
int bv = has_v2 ? ((v2 >= v1) ? v2 : v1): v1;
while(i < n) {
t[i] = bv;
++i;
}
}
}
}
/** Overflow-safe and generally safe */
inline uint32_t internal_mid(uint32_t low, uint32_t high) {
/* Unsigned-ness make this overflow-safe */
@ -166,7 +223,7 @@ inline void internal_spsort(uint32_t *t, int n, int m, uint32_t low, uint32_t mi
// means we can small-sort both.
binsertion_sort(t + left, rights);
} else {
// RE-PIVOT!!!
// RE-PIVOT!!! [right]
// This tries to help for cases where the
// domain of da values have a few outliers
// which would cause many unnecessity split.
@ -174,7 +231,23 @@ inline void internal_spsort(uint32_t *t, int n, int m, uint32_t low, uint32_t mi
// This way the midpoint and all totally get
// re-evaluated too, which adds an O(n) here
// but make us have fewer extra steps hopefully.
spsort(t + left, rights, m);
if(lefts != 0) {
// Regular case (left had some elements at least)
spsort(t + left, rights, m);
} else {
// Extreme case where left was empty!
// This would lead to endless recursion
// so we need to handle this separately!
//
// When left is totally empty that means
// that right bucket can only have at most
// two distict values among its elements or
// less (that is it is either const values
// in a big array OR there are two different
// values in it in any ways). For this we
// implemented a special and optimized sort...
twovalue_sort(t + left, rights); // O(n)
}
}
} else {
// lefts are enough in count
@ -182,7 +255,7 @@ inline void internal_spsort(uint32_t *t, int n, int m, uint32_t low, uint32_t mi
// if rights are too few, we insertion sort them
binsertion_sort(t + left, rights);
// RE-PIVOT!!!
// RE-PIVOT!!! [left]
// and we also need repivot similar to above!
spsort(t, lefts, m);
} else {