#ifndef SPSORT_H #define SPSORT_H /* * Sorting algorithm(s) that basically space-partition the domain of the sort. * * These all do some (usually linear or even const) operation on the input that * "measures" the inputs domain. That is which (kind of) number ranges appear. * Then we know more about the distribution of the elements when it comes to * their value and thus can create "buckets" and partition in the space of the * values in ways that separate them the most. * * Some of these algorithms are simple, some have dynamic data structures for * bucketing similar to octrees in graphics (which served as main idea). * * We only implement some of these, but many variants are possible. If you * only find a single implementation, that is fine too - also we might move * this text to a markdown readme and separate algorithms into different files. */ #include #include // FIXME: vector is for debugging #include /** * A binary search based function * to find the position * where item should be inserted * in a[low..high] */ int binary_search(uint32_t *a, int item, int low, int high) { if (high <= low) { return (item > a[low]) ? (low + 1) : low; } int mid = (low + high) / 2; if(item == a[mid]) return mid + 1; if(item > a[mid]) return binary_search(a, item, mid + 1, high); return binary_search(a, item, low, mid - 1); } /** Binary insertion sort */ void binsertion_sort(uint32_t *a, int n) { uint32_t selected; for (int i = 1; i < n; ++i) { int j = i - 1; selected = a[i]; // find location where selected should be inseretd int loc = binary_search(a, selected, 0, j); // Move all elements after location to create space while (j >= loc) { a[j + 1] = a[j]; j--; } a[j + 1] = selected; } } /** Specific sort that sorts arrays that can only have two or one different values (but many of those). O(n) strictly linear! */ void twovalue_sort(uint32_t *t, int n) { if(n > 0) { // Prepare for counting uint32_t v1 = t[0]; bool has_v2 = false; uint32_t v2; int c1 = 0; int c2 = 0; int i = 0; // Find second value (if there is any) and stop there while((i < n) && !has_v2) { has_v2 = (t[i] != v1); v2 = t[i]; // can be junk, but only used when has_v2 == true! c1 += !has_v2; c2 += has_v2; ++i; } // TODO: This only works for numbers from now on... // But if we keep all until this point, we can // do a similar "xchg to the sides" trick here // by knowing which value is smaller and thus // code this to work generally if needed! // Finish counting loop while(i < n) { c1 += (v1 == t[i]); // count v1s c2 += has_v2 ? (v2 == t[i]) : 0; ++i; } // We have counts of value variants, just write them back // Get which is the smaller value and how many there is? int sv = has_v2 ? ((v2 < v1) ? v2 : v1): v1; int sc = has_v2 ? ((v2 < v1) ? c2 : c1): c1; // Write out the smaller values i = 0; while(i < sc) { t[i] = sv; ++i; } // Write out the big values for all remaining places if(i < n) { int bv = has_v2 ? ((v2 >= v1) ? v2 : v1): v1; while(i < n) { t[i] = bv; ++i; } } } } /** Overflow-safe and generally safe */ inline uint32_t internal_mid(uint32_t low, uint32_t high) { /* Unsigned-ness make this overflow-safe */ uint32_t mid = (low + high) >> 1; assert(low <= mid); assert(mid <= high); return mid; } /** Sort using space-partitioning - does recursive re-pivoting */ inline void spsort(uint32_t *t, int n, int m = 32); /** Helper function that puts elements higher then mid to the top of the array and lower to the bottom. Returns number of bottoms */ inline int internal_array_separate(uint32_t *t, int n, uint32_t mid) { if(n > 0) { // Two heads that also read & write (both) int left = 0; int right = n - 1; while(left < right) { // Step over already good positioned values from left while((left < right) && (t[left] < mid)) { ++left; } // Step over already good positioned values from right while((left < right) && (t[right] >= mid)) { --right; } // Extra check needed for edge-case! if(left < right) { // Both in wrong location - xchg them! auto tmp = t[right]; t[right] = t[left]; t[left] = tmp; ++left; --right; } } // Edge-case increment if single elem happens in middle in the end left += ((left == right) && (t[left] < mid)); return left; } return 0; } // ALGO: [spsort] In-place variant that looks like a quicksort-kind-of-thing: // - I realized that in-placing make this similar to quicksort actually // - We check if the thing is small enough for insertion sorting in-place // - If not, we calculate low-mid-high // - Then we separate the array in low and high (relative to mid) by going two pointers left-and-right additions. // - When loop ends, we have two smallers arrays to process. // - Issue is that if the distribution of input is not equal distribution (or close), then // we can end up many on left and none on right for example! // - To solve that issue, we check if a side is below size of "m" bucket count. // - If below, we run insertion sort on that. // - If not below, we recurse totally - including min-max search domain pivoting!!! [re-pivoting] // - If none of the new buckets too small, both go just partial recursion: pivot calculation not happen and // we provide a pivot (mid) being right between our low-mid or mid-high pairs (depending side). /** Helper function for spsort - does not do re-pivoting and knows the domain */ inline void internal_spsort(uint32_t *t, int n, int m, uint32_t low, uint32_t mid, uint32_t high) { // No reason why we need to sort 1-big buckets... // Also rules out n == 0 and n == 1 bad cases later if we do API this way. assert(n > 1); // Makes small arrays sort fast otherwise we need to go down m == 2 or such! if(n < m) { binsertion_sort(t, n); return; } auto left = internal_array_separate(t, n, mid); auto lefts = left; auto rights = n - left; // Re-pivot the mid if needed if(lefts < m) { // Left can be simple-sorted binsertion_sort(t, left); // Check the right now if(rights < m) { // Balance in both small-extremal // means we can small-sort both. binsertion_sort(t + left, rights); } else { // RE-PIVOT!!! [right] // This tries to help for cases where the // domain of da values have a few outliers // which would cause many unnecessity split. // // This way the midpoint and all totally get // re-evaluated too, which adds an O(n) here // but make us have fewer extra steps hopefully. if(lefts != 0) { // Regular case (left had some elements at least) spsort(t + left, rights, m); } else { // Extreme case where left was empty! // This would lead to endless recursion // so we need to handle this separately! // // When left is totally empty that means // that right bucket can only have at most // two distict values among its elements or // less (that is it is either const values // in a big array OR there are two different // values in it in any ways). For this we // implemented a special and optimized sort... twovalue_sort(t + left, rights); // O(n) } } } else { // lefts are enough in count if(rights < m) { // if rights are too few, we insertion sort them binsertion_sort(t + left, rights); // RE-PIVOT!!! [left] // and we also need repivot similar to above! spsort(t, lefts, m); } else { // Both are non-extremal cases, which means balance too // but this case we can calculate the mid by guesswork! internal_spsort(t, lefts, m, low, internal_mid(low, mid), mid); internal_spsort(t + left, rights, m, mid, internal_mid(mid, high), high); } } } /** Sort using space-partitioning - does recursive re-pivoting */ inline void spsort(uint32_t *t, int n, int m) { assert(n * int64_t(sizeof(t[0])) <= INT_MAX); if(n > 0) { /* 1.) DOM min-max search. O(n) */ uint32_t low = UINT32_MAX; uint32_t high = 0; for(int i = 0; i < n; ++i) { if(t[i] < low) low = t[i]; if(t[i] > high) high = t[i]; } uint32_t mid = internal_mid(low, high); // pivot /* 2.) Sort with space-partitioning */ internal_spsort(t, n, m, low, mid, high); } } // TODO: [spsort_copy] The algorithm for the non-in-place (copy) variant: // - Basically we take the data and measure the domain (min-max) in O(n) // - We then go over the input again (n times the following): // -- Put the value in the bucket where it belongs (octree-insert-1D operation) // -- If bucket grows too big (more than 'm' elements), then split the bucket contents. // -- Split can be binary OR any kind of n-ary split in the algorithm of ours. // -- An O(m) operation splits into the sub-buckets // -- So far so good, because insertion point finding is O(logn) and this is O(m) which latter is const. // -- The problem is that in the worst case however, We generated something that we need to split right again! // -- Actually that is not the worst yet, the worst is that we need to do so yet we not win on that! // -- When that happens? When the values have a very few outliers and a bunch of "regular" data on the other end of domain... // -- I advise to re-evaluate the pivot value at this point to avoid too many unnecessary steps (min-max on the bucket). /** Sort using space-partitioning - not in-place! */ inline void spsort_copy(uint32_t *t, int n) { assert(n * int64_t(sizeof(t[0])) <= INT_MAX); if(n > 0) { /* 1.) DOM min-max search. O(n) */ uint32_t min = UINT32_MAX; uint32_t max = 0; for(int i = 0; i < n; ++i) { if(t[i] < min) min = t[i]; if(t[i] > max) max = t[i]; } /* 2.) Sort with space-partitioning */ /* Unsigned-ness make this overflow-safe */ uint32_t pivot = (min + max) >> 1; for(int i = 0; i < n; ++i) { // TODO: using classes and malloc (linked data)? } } } // TODO: [spsort_inplace] In-place variant 2! // See spsort_copy for more info... // // Step 1: // - Do min-max just like before // - We in the meantime split the input in two: based on the top bit. This is a radix-like step just on top bit. // - In Step2 we work on two arrays: one has top bits all set, other has not. We can thus use the top bit as flag. // - This happens in-place by swapping onto the left-right parts of the array. // // Step 2: // - First off we need to "realize" that the sum of number of elements of buckets are <= "n" (the elemno)! // - Then we will realize soon that it is exactly "i" (the loop variable) in the sort loop! // - This "hints" that we can do this alg in-place, or at least inplace numbers and some log(n) tree-descriptor // - The already-processed part of the array (below ith elements) should be where we place data for the sp-tree buckets! // - First there should be a single bucket - just like in the original. That is from the left of the array. // - When we have more elements than 'm' (bucket max size) value, we need to split contents of the bucket in two smaller. // - Then: // -- We can use the "flag bit" to tell where the buckets end. // -- Or can do a data structure that describes the tree. // - Data we sort will stay in-place, but there is either runtime cost OR additional memory cost for the tree descriptor. // The runtime cost happens on "inserting" into the space partitioning when we need to check the flags. // - When "splitting" we need to split that bucket by xchgs to the first-last elements from two sides. // - In beginning there is only one bucket and we check if we split that just by its size. // - I feel there needs a log(n) data structure with from-to descriptors of the tree + children descriptor. // - Maybe the "more-in-place" variant should be in its own file separately? #endif /* SPSORT_H */