From fad7345a8066240f1f055b162c1ea799c61b12a0 Mon Sep 17 00:00:00 2001
From: Richard Thier <magosit@outlook.hu>
Date: Tue, 16 Aug 2022 03:28:06 +0200
Subject: [PATCH] space partitioning sort first - buggy, but neargood -
 versions

---
 space_partitioning_sort/spsort.h | 246 +++++++++++++++++++++++++++++++
 1 file changed, 246 insertions(+)
 create mode 100644 space_partitioning_sort/spsort.h

diff --git a/space_partitioning_sort/spsort.h b/space_partitioning_sort/spsort.h
new file mode 100644
index 0000000..91d428e
--- /dev/null
+++ b/space_partitioning_sort/spsort.h
@@ -0,0 +1,246 @@
+#ifndef SPSORT_H
+#define SPSORT_H
+/*
+ * Sorting algorithm(s) that basically space-partition the domain of the sort.
+ *
+ * These all do some (usually linear or even const) operation on the input that
+ * "measures" the inputs domain. That is which (kind of) number ranges appear.
+ * Then we know more about the distribution of the elements when it comes to
+ * their value and thus can create "buckets" and partition in the space of the
+ * values in ways that separate them the most.
+ *
+ * Some of these algorithms are simple, some have dynamic data structures for
+ * bucketing similar to octrees in graphics (which served as main idea).
+ *
+ * We only implement some of these, but many variants are possible. If you
+ * only find a single implementation, that is fine too - also we might move
+ * this text to a markdown readme and separate algorithms into different files.
+ */
+
+#include <stdint.h>
+#include <assert.h>
+
+/**
+ * A binary search based function
+ * to find the position
+ * where item should be inserted
+ * in a[low..high]
+ */
+int binary_search(uint32_t *a, int item, int low, int high) {
+	if (high <= low) 	{
+		return (item > a[low]) ?
+			(low + 1) : low;
+	}
+
+	int mid = (low + high) / 2;
+
+	if(item == a[mid]) return mid + 1;
+
+	if(item > a[mid]) return binary_search(a, item, mid + 1, high);
+	return binary_search(a, item, low, mid - 1);
+}
+
+/** Binary insertion sort */
+void binsertion_sort(uint32_t *a, int n) {
+	uint32_t selected;
+
+	for (int i = 1; i < n; ++i) {
+		int j = i - 1;
+		selected = a[i];
+
+		// find location where selected should be inseretd
+		int loc = binary_search(a, selected, 0, j);
+
+		// Move all elements after location to create space
+		while (j >= loc) {
+			a[j + 1] = a[j];
+			j--;
+		}
+		a[j + 1] = selected;
+	}
+}
+
+/** Overflow-safe and generally safe */
+inline uint32_t internal_mid(uint32_t low, uint32_t high) {
+	/* Unsigned-ness make this overflow-safe */
+	uint32_t mid = (low + high) >> 1;
+	assert(low <= mid);
+	assert(mid <= high);
+	return mid;
+}
+
+/** Sort using space-partitioning - does recursive re-pivoting */
+inline void spsort(uint32_t *t, int n, int m = 32);
+
+
+// TODO: [spsort] In-place variant that looks like a quicksort-kind-of-thing:
+//       - I realized that in-placing make this similar to quicksort actually
+//       - We check if the thing is small enough for insertion sorting in-place
+//       - If not, we calculate low-mid-high
+//       - Then we separate the array in low and high (relative to mid) by going two pointers left-and-right additions.
+//       - When loop ends, we have two smallers arrays to process.
+//       - Issue is that if the distribution of input is not equal distribution (or close), then
+//         we can end up many on left and none on right for example!
+//       - To solve that issue, we check if a side is below size of "m" bucket count.
+//       - If below, we run insertion sort on that.
+//       - If not below, we recurse totally - including min-max search domain pivoting!!! [re-pivoting]
+//       - If none of the new buckets too small, both go just partial recursion: pivot calculation not happen and
+//         we provide a pivot (mid) being right between our low-mid or mid-high pairs (depending side).
+
+/** Helper function for spsort - does not do re-pivoting and knows the domain */
+inline void internal_spsort(uint32_t *t, int n, int m, uint32_t low, uint32_t mid, uint32_t high) {
+	// No reason why we need to sort 1-big buckets...
+	// Also rules out n == 0 and n == 1 bad cases later if we do API this way.
+	assert(n > 1);
+
+	// Makes small arrays sort fast otherwise we need to go down m == 2 or such!
+	if(n < m) {
+		binsertion_sort(t, n);
+	}
+
+	// Init left-right "write head" indices
+	int left = 0;
+	int right = n - 1;
+
+	uint32_t current = t[left];
+
+	// Separate to the ends of the array (in-place)
+	// We avoid the first location to hold invariant!
+	for(int i = 1; i < n; ++i) {
+		// TODO: maybe std::swaps below can be faster when generalizing?
+		if(current < mid) {
+			const auto tmp = t[left];
+			t[left++] = current;
+			current = tmp;
+
+		} else {
+			const auto tmp = t[right];
+			t[right--] = current;
+			current = tmp;
+		}
+	}
+
+	auto lefts = left;
+	auto rights = right - left;
+
+	// Re-pivot the mid if needed
+	if(lefts < m) {
+		binsertion_sort(t, left);
+		if(rights < m) {
+			// Balance in both small-extremal
+			// means we can small-sort both.
+			binsertion_sort(t + left, rights);
+		} else {
+			// RE-PIVOT!!!
+			// This tries to help for cases where the
+			// domain of da values have a few outliers
+			// which would cause many unnecessity split.
+			//
+			// This way the midpoint and all totally get
+			// re-evaluated too, which adds an O(n) here
+			// but make us have fewer extra steps hopefully.
+			spsort(t + left, rights, m);
+		}
+	} else {
+		// lefts are enough in count
+		if(rights < m) {
+			// if rights are too few, we insertion sort them
+			binsertion_sort(t + left, rights);
+
+			// RE-PIVOT!!!
+			// and we also need repivot similar to above!
+			spsort(t, lefts, m);
+		} else {
+			// Both are non-extremal cases, which means balance too
+			// but this case we can calculate the mid by guesswork!
+			internal_spsort(t, lefts, m, low, internal_mid(low, mid), mid);
+			internal_spsort(t + left, rights, m, mid, internal_mid(mid, high), high);
+		}
+	}
+}
+
+/** Sort using space-partitioning - does recursive re-pivoting */
+inline void spsort(uint32_t *t, int n, int m) {
+	assert(n * int64_t(sizeof(t[0])) <= INT_MAX);
+	if(n > 0) {
+		/* 1.) DOM min-max search. O(n) */
+
+		uint32_t low = UINT32_MAX;
+		uint32_t high = 0;
+		for(int i = 0; i < n; ++i) {
+			if(t[i] < low) low = t[i];
+			if(t[i] > high) high = t[i];
+		}
+
+		uint32_t mid = internal_mid(low, high); // pivot
+
+		/* 2.) Sort with space-partitioning */
+
+		internal_spsort(t, n, m, low, mid, high);
+	}
+}
+
+// TODO: [spsort_copy] The algorithm for the non-in-place (copy) variant:
+//       - Basically we take the data and measure the domain (min-max) in O(n)
+//       - We then go over the input again (n times the following):
+//       -- Put the value in the bucket where it belongs (octree-insert-1D operation)
+//       -- If bucket grows too big (more than 'm' elements), then split the bucket contents.
+//       -- Split can be binary OR any kind of n-ary split in the algorithm of ours.
+//       -- An O(m) operation splits into the sub-buckets
+//       -- So far so good, because insertion point finding is O(logn) and this is O(m) which latter is const.
+//       -- The problem is that in the worst case however, We generated something that we need to split right again!
+//       -- Actually that is not the worst yet, the worst is that we need to do so yet we not win on that!
+//       -- When that happens? When the values have a very few outliers and a bunch of "regular" data on the other end of domain...
+//       -- I advise to re-evaluate the pivot value at this point to avoid too many unnecessary steps (min-max on the bucket).
+
+/** Sort using space-partitioning - not in-place! */
+inline void spsort_copy(uint32_t *t, int n) {
+	assert(n * int64_t(sizeof(t[0])) <= INT_MAX);
+	if(n > 0) {
+		/* 1.) DOM min-max search. O(n) */
+
+		uint32_t min = UINT32_MAX;
+		uint32_t max = 0;
+		for(int i = 0; i < n; ++i) {
+			if(t[i] < min) min = t[i];
+			if(t[i] > max) max = t[i];
+		}
+
+		/* 2.) Sort with space-partitioning */
+
+		/* Unsigned-ness make this overflow-safe */
+		uint32_t pivot = (min + max) >> 1;
+
+		for(int i = 0; i < n; ++i) {
+			// TODO: using classes and malloc (linked data)?
+		}
+	}
+}
+
+// TODO: [spsort_inplace] In-place variant 2!
+//       See spsort_copy for more info...
+//
+//       Step 1:
+//       - Do min-max just like before
+//       - We in the meantime split the input in two: based on the top bit. This is a radix-like step just on top bit.
+//       - In Step2 we work on two arrays: one has top bits all set, other has not. We can thus use the top bit as flag.
+//       - This happens in-place by swapping onto the left-right parts of the array.
+//
+//       Step 2:
+//       - First off we need to "realize" that the sum of number of elements of buckets are <= "n" (the elemno)!
+//       - Then we will realize soon that it is exactly "i" (the loop variable) in the sort loop!
+//       - This "hints" that we can do this alg in-place, or at least inplace numbers and some log(n) tree-descriptor
+//       - The already-processed part of the array (below ith elements) should be where we place data for the sp-tree buckets!
+//       - First there should be a single bucket - just like in the original. That is from the left of the array.
+//       - When we have more elements than 'm' (bucket max size) value, we need to split contents of the bucket in two smaller.
+//       - Then:
+//       -- We can use the "flag bit" to tell where the buckets end.
+//       -- Or can do a data structure that describes the tree.
+//       - Data we sort will stay in-place, but there is either runtime cost OR additional memory cost for the tree descriptor.
+//         The runtime cost happens on "inserting" into the space partitioning when we need to check the flags.
+//       - When "splitting" we need to split that bucket by xchgs to the first-last elements from two sides.
+//       - In beginning there is only one bucket and we check if we split that just by its size.
+//       - I feel there needs a log(n) data structure with from-to descriptors of the tree + children descriptor.
+//       - Maybe the "more-in-place" variant should be in its own file separately?
+
+#endif /* SPSORT_H */