From fad7345a8066240f1f055b162c1ea799c61b12a0 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Tue, 16 Aug 2022 03:28:06 +0200 Subject: [PATCH] space partitioning sort first - buggy, but neargood - versions --- space_partitioning_sort/spsort.h | 246 +++++++++++++++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 space_partitioning_sort/spsort.h diff --git a/space_partitioning_sort/spsort.h b/space_partitioning_sort/spsort.h new file mode 100644 index 0000000..91d428e --- /dev/null +++ b/space_partitioning_sort/spsort.h @@ -0,0 +1,246 @@ +#ifndef SPSORT_H +#define SPSORT_H +/* + * Sorting algorithm(s) that basically space-partition the domain of the sort. + * + * These all do some (usually linear or even const) operation on the input that + * "measures" the inputs domain. That is which (kind of) number ranges appear. + * Then we know more about the distribution of the elements when it comes to + * their value and thus can create "buckets" and partition in the space of the + * values in ways that separate them the most. + * + * Some of these algorithms are simple, some have dynamic data structures for + * bucketing similar to octrees in graphics (which served as main idea). + * + * We only implement some of these, but many variants are possible. If you + * only find a single implementation, that is fine too - also we might move + * this text to a markdown readme and separate algorithms into different files. + */ + +#include +#include + +/** + * A binary search based function + * to find the position + * where item should be inserted + * in a[low..high] + */ +int binary_search(uint32_t *a, int item, int low, int high) { + if (high <= low) { + return (item > a[low]) ? + (low + 1) : low; + } + + int mid = (low + high) / 2; + + if(item == a[mid]) return mid + 1; + + if(item > a[mid]) return binary_search(a, item, mid + 1, high); + return binary_search(a, item, low, mid - 1); +} + +/** Binary insertion sort */ +void binsertion_sort(uint32_t *a, int n) { + uint32_t selected; + + for (int i = 1; i < n; ++i) { + int j = i - 1; + selected = a[i]; + + // find location where selected should be inseretd + int loc = binary_search(a, selected, 0, j); + + // Move all elements after location to create space + while (j >= loc) { + a[j + 1] = a[j]; + j--; + } + a[j + 1] = selected; + } +} + +/** Overflow-safe and generally safe */ +inline uint32_t internal_mid(uint32_t low, uint32_t high) { + /* Unsigned-ness make this overflow-safe */ + uint32_t mid = (low + high) >> 1; + assert(low <= mid); + assert(mid <= high); + return mid; +} + +/** Sort using space-partitioning - does recursive re-pivoting */ +inline void spsort(uint32_t *t, int n, int m = 32); + + +// TODO: [spsort] In-place variant that looks like a quicksort-kind-of-thing: +// - I realized that in-placing make this similar to quicksort actually +// - We check if the thing is small enough for insertion sorting in-place +// - If not, we calculate low-mid-high +// - Then we separate the array in low and high (relative to mid) by going two pointers left-and-right additions. +// - When loop ends, we have two smallers arrays to process. +// - Issue is that if the distribution of input is not equal distribution (or close), then +// we can end up many on left and none on right for example! +// - To solve that issue, we check if a side is below size of "m" bucket count. +// - If below, we run insertion sort on that. +// - If not below, we recurse totally - including min-max search domain pivoting!!! [re-pivoting] +// - If none of the new buckets too small, both go just partial recursion: pivot calculation not happen and +// we provide a pivot (mid) being right between our low-mid or mid-high pairs (depending side). + +/** Helper function for spsort - does not do re-pivoting and knows the domain */ +inline void internal_spsort(uint32_t *t, int n, int m, uint32_t low, uint32_t mid, uint32_t high) { + // No reason why we need to sort 1-big buckets... + // Also rules out n == 0 and n == 1 bad cases later if we do API this way. + assert(n > 1); + + // Makes small arrays sort fast otherwise we need to go down m == 2 or such! + if(n < m) { + binsertion_sort(t, n); + } + + // Init left-right "write head" indices + int left = 0; + int right = n - 1; + + uint32_t current = t[left]; + + // Separate to the ends of the array (in-place) + // We avoid the first location to hold invariant! + for(int i = 1; i < n; ++i) { + // TODO: maybe std::swaps below can be faster when generalizing? + if(current < mid) { + const auto tmp = t[left]; + t[left++] = current; + current = tmp; + + } else { + const auto tmp = t[right]; + t[right--] = current; + current = tmp; + } + } + + auto lefts = left; + auto rights = right - left; + + // Re-pivot the mid if needed + if(lefts < m) { + binsertion_sort(t, left); + if(rights < m) { + // Balance in both small-extremal + // means we can small-sort both. + binsertion_sort(t + left, rights); + } else { + // RE-PIVOT!!! + // This tries to help for cases where the + // domain of da values have a few outliers + // which would cause many unnecessity split. + // + // This way the midpoint and all totally get + // re-evaluated too, which adds an O(n) here + // but make us have fewer extra steps hopefully. + spsort(t + left, rights, m); + } + } else { + // lefts are enough in count + if(rights < m) { + // if rights are too few, we insertion sort them + binsertion_sort(t + left, rights); + + // RE-PIVOT!!! + // and we also need repivot similar to above! + spsort(t, lefts, m); + } else { + // Both are non-extremal cases, which means balance too + // but this case we can calculate the mid by guesswork! + internal_spsort(t, lefts, m, low, internal_mid(low, mid), mid); + internal_spsort(t + left, rights, m, mid, internal_mid(mid, high), high); + } + } +} + +/** Sort using space-partitioning - does recursive re-pivoting */ +inline void spsort(uint32_t *t, int n, int m) { + assert(n * int64_t(sizeof(t[0])) <= INT_MAX); + if(n > 0) { + /* 1.) DOM min-max search. O(n) */ + + uint32_t low = UINT32_MAX; + uint32_t high = 0; + for(int i = 0; i < n; ++i) { + if(t[i] < low) low = t[i]; + if(t[i] > high) high = t[i]; + } + + uint32_t mid = internal_mid(low, high); // pivot + + /* 2.) Sort with space-partitioning */ + + internal_spsort(t, n, m, low, mid, high); + } +} + +// TODO: [spsort_copy] The algorithm for the non-in-place (copy) variant: +// - Basically we take the data and measure the domain (min-max) in O(n) +// - We then go over the input again (n times the following): +// -- Put the value in the bucket where it belongs (octree-insert-1D operation) +// -- If bucket grows too big (more than 'm' elements), then split the bucket contents. +// -- Split can be binary OR any kind of n-ary split in the algorithm of ours. +// -- An O(m) operation splits into the sub-buckets +// -- So far so good, because insertion point finding is O(logn) and this is O(m) which latter is const. +// -- The problem is that in the worst case however, We generated something that we need to split right again! +// -- Actually that is not the worst yet, the worst is that we need to do so yet we not win on that! +// -- When that happens? When the values have a very few outliers and a bunch of "regular" data on the other end of domain... +// -- I advise to re-evaluate the pivot value at this point to avoid too many unnecessary steps (min-max on the bucket). + +/** Sort using space-partitioning - not in-place! */ +inline void spsort_copy(uint32_t *t, int n) { + assert(n * int64_t(sizeof(t[0])) <= INT_MAX); + if(n > 0) { + /* 1.) DOM min-max search. O(n) */ + + uint32_t min = UINT32_MAX; + uint32_t max = 0; + for(int i = 0; i < n; ++i) { + if(t[i] < min) min = t[i]; + if(t[i] > max) max = t[i]; + } + + /* 2.) Sort with space-partitioning */ + + /* Unsigned-ness make this overflow-safe */ + uint32_t pivot = (min + max) >> 1; + + for(int i = 0; i < n; ++i) { + // TODO: using classes and malloc (linked data)? + } + } +} + +// TODO: [spsort_inplace] In-place variant 2! +// See spsort_copy for more info... +// +// Step 1: +// - Do min-max just like before +// - We in the meantime split the input in two: based on the top bit. This is a radix-like step just on top bit. +// - In Step2 we work on two arrays: one has top bits all set, other has not. We can thus use the top bit as flag. +// - This happens in-place by swapping onto the left-right parts of the array. +// +// Step 2: +// - First off we need to "realize" that the sum of number of elements of buckets are <= "n" (the elemno)! +// - Then we will realize soon that it is exactly "i" (the loop variable) in the sort loop! +// - This "hints" that we can do this alg in-place, or at least inplace numbers and some log(n) tree-descriptor +// - The already-processed part of the array (below ith elements) should be where we place data for the sp-tree buckets! +// - First there should be a single bucket - just like in the original. That is from the left of the array. +// - When we have more elements than 'm' (bucket max size) value, we need to split contents of the bucket in two smaller. +// - Then: +// -- We can use the "flag bit" to tell where the buckets end. +// -- Or can do a data structure that describes the tree. +// - Data we sort will stay in-place, but there is either runtime cost OR additional memory cost for the tree descriptor. +// The runtime cost happens on "inserting" into the space partitioning when we need to check the flags. +// - When "splitting" we need to split that bucket by xchgs to the first-last elements from two sides. +// - In beginning there is only one bucket and we check if we split that just by its size. +// - I feel there needs a log(n) data structure with from-to descriptors of the tree + children descriptor. +// - Maybe the "more-in-place" variant should be in its own file separately? + +#endif /* SPSORT_H */