diff --git a/qsort.h b/qsort.h index 4f5c513..118b9ae 100644 --- a/qsort.h +++ b/qsort.h @@ -6,10 +6,19 @@ /* Structure: * - * - BASICS - * - EXTRAS + * - BASICS: Basic quicksort + * - EXTRAS: Extra quicksort and partitioning helpers (also random pick and pivot index picks). Basic (free) min-max search + * - PMINMAX: Partitioning with min-max searches in generated partitions + * - VALPART: Partitioning by value - not by random or given pivot index! */ +/** Hand-coded abs difference - I wanted to minimize dependencies by not needing stdlib.h */ +static inline int64_t absdiff(uint32_t a, uint32_t b) { + /* Should be branchless and fast */ + int64_t n = (a - b); + return (n > 0) ? n : -n; +} + /* BASICS */ /** Swap operation */ @@ -35,7 +44,7 @@ static inline int partition(uint32_t array[], int low, int high) { /* select the rightmost element as pivot */ uint32_t pivot = array[high]; - + /* index until smaller or eq elements lay */ int i = (low - 1); @@ -44,11 +53,10 @@ static inline int partition(uint32_t array[], int low, int high) { #pragma GCC unroll 4 for (int j = low; j < high; ++j) { if (array[j] <= pivot) { - /* if element smaller than pivot is found */ /* swap it with the greater element pointed by i */ ++i; - + /* swap element at i with element at j */ swapit(&array[i], &array[j]); } @@ -56,7 +64,7 @@ static inline int partition(uint32_t array[], int low, int high) { /* swap the pivot element with the greater element at i */ swapit(&array[i + 1], &array[high]); - + /* return the partition point */ return (i + 1); } @@ -65,10 +73,10 @@ static inline int partition(uint32_t array[], int low, int high) { static inline void quicksort(uint32_t array[], int low, int high) { if (low < high) { int pi = partition(array, low, high); - + /* recursive call on the left of pivot */ quicksort(array, low, pi - 1); - + /* recursive call on the right of pivot */ quicksort(array, pi + 1, high); } @@ -95,7 +103,7 @@ static inline int partition_with_pivot(uint32_t array[], int pi, int low, int hi * writing the whole out I can tell * this is still fastests basically. */ - + /* swap pivot with rightmost */ swapit(&array[high], &array[pi]); /* delegate to previous sol. */ @@ -136,11 +144,10 @@ static inline int partition_and_minmax(uint32_t array[], int low, int high, uint /* Lomuto partitioning */ if (array[j] <= pivot) { - /* if element smaller than pivot is found */ /* swap it with the greater element pointed by i */ ++i; - + /* swap element at i with element at j */ swapit(&array[i], &array[j]); } @@ -148,7 +155,7 @@ static inline int partition_and_minmax(uint32_t array[], int low, int high, uint /* swap the pivot element with the greater element at i */ swapit(&array[i + 1], &array[high]); - + /* return the partition point */ return (i + 1); } @@ -174,7 +181,7 @@ static inline int partition_and_minmax_with_pivot(uint32_t array[], int pi, int * writing the whole out I can tell * this is still fastests basically. */ - + /* swap pivot with rightmost */ swapit(&array[high], &array[pi]); /* delegate to previous sol. */ @@ -210,4 +217,162 @@ static inline void quicksort_rand(uint32_t array[], int low, int high, rpivotsta } } +/* PMINMAX */ + +/** + * Partition the array with partition-based min-max search (4 values: 2 per partition) and find the pivot element such that + * + * - Elements smaller than pivot are on left of pivot + * - Elements greater than pivot are on right of pivot + * + * @param array The array to partition + * @param low From when. (inclusive) + * @param high Until when. (inclusive too!) + * @param minout_left OUT: Will be filled with the minimum key of left partition or pivot value when partition empty + * @param maxout_left OUT: Will be filled with the maximum key of left partition or pivot value when partition empty + * @param minout_right OUT: Will be filled with the minimum key of right partition or pivot value when partition empty + * @param maxout_right OUT: Will be filled with the maximum key of right partition or pivot value when partition empty + * @returns The partition point. + */ +static inline int partition_and_pminmax( + uint32_t array[], + int low, + int high, + uint32_t *minout_left, + uint32_t *maxout_left, + uint32_t *minout_right, + uint32_t *maxout_right) { + /* This is "Lomuto"s unidirectional partitioner - see algorithms book */ + + /* select the rightmost element as pivot */ + uint32_t pivot = array[high]; + *minout_left = pivot; + *maxout_left = pivot; + *minout_right = pivot; + *maxout_right = pivot; + + /* index until smaller or eq elements lay */ + int i = (low - 1); + + /* traverse each element of the array */ + /* compare them with the pivot */ + #pragma GCC unroll 4 + for (int j = low; j < high; ++j) { + /* Lomuto partitioning */ + if (array[j] <= pivot) { + /* Branchless min-max */ + *minout_left = array[j] < *minout_left ? array[j] : *minout_left; + *maxout_left = array[j] > *maxout_left ? array[j] : *maxout_left; + + /* if element smaller than pivot is found */ + /* swap it with the greater element pointed by i */ + ++i; + + /* swap element at i with element at j */ + swapit(&array[i], &array[j]); + } else { + /* Branchless min-max */ + *minout_right = array[j] < *minout_right ? array[j] : *minout_right; + *maxout_right = array[j] > *maxout_right ? array[j] : *maxout_right; + } + } + + /* swap the pivot element with the greater element at i */ + swapit(&array[i + 1], &array[high]); + + /* return the partition point */ + return (i + 1); +} + +/** + * Partition the array with partition-based min-max search (4 values: 2 per partition) and using the pivot index + * + * - Elements smaller than pivot are on left of pivot + * - Elements greater than pivot are on right of pivot + * + * @param array The array to partition + * @param pi The index of the pivot element to use. 0 or high is what OG quicksorts do. + * @param low From when. (inclusive) + * @param high Until when. (inclusive too!) + * @param minout OUT: Will be filled with the minimum key + * @param maxout OUT: Will be filled with the maximum key + * @returns The partition point. + */ +static inline int partition_and_pminmax_with_pivot( + uint32_t array[], + int pi, + int low, + int high, + uint32_t *minout_left, + uint32_t *maxout_left, + uint32_t *minout_right, + uint32_t *maxout_right) { + /* + * Rem.: This looks like overhead, + * but after seriously considering + * writing the whole out I can tell + * this is still fastests basically. + */ + + /* swap pivot with rightmost */ + swapit(&array[high], &array[pi]); + /* delegate to previous sol. */ + return partition_and_pminmax(array, low, high, minout_left, maxout_left, maxout_left, maxout_right); +} + +/* VALPART */ + +/** + * Partition the array using pivot value - and find pivot closest to that value (and place them at proper pivot index) + * + * - Elements smaller than pivot are on left of pivot + * - Elements greater than pivot are on right of pivot + * - The "pivot" element we have found is the closest to the "pivotval" given. + * + * @param array The array to partition + * @param low From when. (inclusive) + * @param high Until when. (inclusive too!) + * @param pivotval This value is used to partition the array - pivot will be the closest to this valued element! + * @returns The partition point. + */ +static inline int partition_with_pivotval(uint32_t array[], int low, int high, uint32_t pivotval) { + /* This is "Lomuto"s unidirectional partitioner - see algorithms book */ + + /* Select the rightmost element as pivot just because */ + /* Need some start-value for min(abs(pv - p)) search! */ + int64_t mindiff = absdiff(array[low], pivotval); + /* Index of currently found pivot value */ + uint32_t pivoti = low; + + /* index until smaller or eq elements lay */ + int i = (low - 1); + + /* traverse each element of the array */ + /* compare them with the pivotval */ + /* The "<=" is needed for our trickz here too */ + #pragma GCC unroll 4 + for (int j = low; j <= high; ++j) { + /* This "<=" ensures pivoti must be only searched among "left" values! */ + if (array[j] <= pivotval) { + /* if element smaller than pivot is found */ + /* swap it with the greater element pointed by i */ + ++i; + + /* swap element at i with element at j */ + swapit(&array[i], &array[j]); + + /* After this, array[i] can never change - so we can save it as a found pivot-index */ + /* Min-search on elements by telling which is closest to pivotval by abs difference! */ + int64_t diff = absdiff(array[i], pivotval); + pivoti = (diff < mindiff) ? i : pivoti; + } + } + + /* swap the pivot element into its place */ + swapit(&array[i], &array[pivoti]); + + /* return the partition point: index of pivot element */ + return i; +} + #endif /* MY_QUICKSORT_H */ diff --git a/zssort.h b/zssort.h index 1070e16..5386ac4 100644 --- a/zssort.h +++ b/zssort.h @@ -1,9 +1,18 @@ #ifndef ZS_SORT_H #define ZS_SORT_H +/* + * Structure: + * - ZSSORT: The at most log(n) space needing quicksort + * - RZSSORT: Randomized pivoting variant at most log(n) space needing quicksort + * - RZSSORTC: Added check for fully same value const array (also randomized pivot) + * - MEANQS: Randomized pivoting AND pivoting based on min-max range - dual recursive + */ #include #include "qsort.h" +/* ZSSORT */ + /** Always at most log(n) space needing quicksort variant */ static inline void zssort(uint32_t array[], int low, int high) { /* (*) Loop handles original "other half recursion"! */ @@ -28,6 +37,8 @@ static inline void zssort(uint32_t array[], int low, int high) { } } +/* ZSSORT */ + /** Always at most log(n) space needing randomized quicksort variant */ static inline void zssort_rand(uint32_t array[], int low, int high, rpivotstate *state) { while (low < high) { @@ -52,6 +63,8 @@ static inline void zssort_rand(uint32_t array[], int low, int high, rpivotstate } } +/* ZSSORTC */ + /** Always at most log(n) space needing randomized quicksort variant - with checking for sameconst-arrays */ static inline void zsrc(uint32_t array[], int low, int high, rpivotstate *state) { if (low < high) { @@ -66,14 +79,116 @@ static inline void zsrc(uint32_t array[], int low, int high, rpivotstate *state) } } +/* MEANQS */ + +/** + * Rangsort entry point and outer-recursive function of meanqs. + * + * We pick a random pivot, but in the meantime try to quess mean for next level partitioning. + * This and meanqs_inner is co-recursive (call each other) + * + * @param array The array to sort + * @param low From when. (inclusive) + * @param high Until when. (inclusive too!) + * @param state The random pivot picking state + */ +static inline void meanqs(uint32_t array[], int low, int high, rpivotstate *state); + +/** + * Inner-recursive function of meanqs - calls meanqs() as next level recursion. + * + * @param array The array to sort + * @param low From when. (inclusive) + * @param high Until when. (inclusive too!) + * @param pivotval We first partition to smaller-eq/bigger than pivotval, later we might pick random though + * @param state The random pivot picking state + */ +static inline void meanqs_inter(uint32_t array[], int low, int high, uint32_t pivotval, rpivotstate *state) { + + /* First: partition using pivot val + recurse */ + + if (low < high) { + /* For the first level, use the pivotval they provide */ + /* This only helps somewhat, because below loop.... */ + /* ..But code it better if you can do consistently! */ + int pi = partition_with_pivotval(array, low, high, pivotval); + + /* If we recurse only the smaller part */ + /* That ensures at most n/2 elements can */ + /* be on any given level of the recursion */ + /* tree: that is we ensure log2(N) memuse! */ + if((pi - low) < (high - pi)) { + // Left smaller: recurse left of pivot + meanqs(array, low, pi - 1, state); + // (*) Update partitioning loop for remaining part + low = pi + 1; + } else { + // Right smaller: recurse right of pivot + meanqs(array, pi + 1, high, state); + // (*) Update partitioning loop for remaining part + high = pi - 1; /* high inclusive! */ + } + } + + /* Later: picking random pivot and partition like that */ + + /* (*) Rem.: The above if can be understood as different first iteration of this while loop... */ + while (low < high) { + /* Random picking here - no info anymore about mean */ + int pi = pick_pivot(state, (high + 1) - low) + low; + pi = partition_with_pivot(array, pi, low, high); + + /* If we recurse only the smaller part */ + /* That ensures at most n/2 elements can */ + /* be on any given level of the recursion */ + /* tree: that is we ensure log2(N) memuse! */ + if((pi - low) < (high - pi)) { + // Left smaller: recurse left of pivot + meanqs(array, low, pi - 1, state); + // (*) Update partitioning loop for remaining part + low = pi + 1; + } else { + // Right smaller: recurse right of pivot + meanqs(array, pi + 1, high, state); + // (*) Update partitioning loop for remaining part + high = pi - 1; /* high inclusive! */ + } + } +} + +/** + * Rangsort entry point and outer-recursive function of meanqs. + * + * We pick a random pivot, but in the meantime try to quess mean for next level partitioning. + * This and meanqs_inner is co-recursive (call each other) + * + * @param array The array to sort + * @param low From when. (inclusive) + * @param high Until when. (inclusive too!) + * @param state The random pivot picking state + */ +static inline void meanqs(uint32_t array[], int low, int high, rpivotstate *state) { + if (low < high) { + uint32_t lmin, lmax, rmin, rmax; + int pi = pick_pivot(state, (high + 1) - low) + low; + pi = partition_and_pminmax_with_pivot(array, pi, low, high, &lmin, &lmax, &rmin, &rmax); + + /* These also handle constant (always same valued) subarrays */ + if(lmin != lmax) { + /* Better than (a+b)/2 avg because of overflow possibilities */ + uint32_t lavg = lmin + (lmax - lmin) / 2; + /* Recursion with inter "ensures" logn stack spacen need! */ + meanqs_inter(array, low, pi - 1, lavg, state); + } + if(rmin != rmax) { + /* Better than (a+b)/2 avg because of overflow possibilities */ + uint32_t ravg = rmin + (rmax - rmin) / 2; + /* Recursion with inter "ensures" logn stack spacen need! */ + meanqs_inter(array, pi + 1, high, ravg, state); + } + } +} + // TODO: Idea: Quadratic time happens when you repeatedly pick pivots close to the maximum or minimum - so why not re-pick near extremals being picked which now can be cheked? I think we should have a treshold - maybe dynamically set by first minmax - and logarithmically divide the treshold by 2 in case its still too close the next randomization time. This handles smallrange badly though. -// TODO: Guess the pivot value by previous min-maxes and let us use it inn partitioning (which would lead to "closest to pivot value" being swapped to its right position to keep the left:pivot:right separation architecture. Maybe can be faster than rand, although our randoms are pretty darn fast as you can see. - -// TODO: Hackzolt éjféli ötlet: -// - Particionálás min-max-avgleft-el -// - két irányba rekurzió, bal oldalinak avgleft átadása, másik sima zssort_rand -// - avgleft-es rekurzióból visszahívás ennek a jelenleginek a fő függvényére (ide-oda típusú rekurzió!) -// - esetleg ha megéri, akkor avgright kiszámítása is jó lehet: drágább műveletek, de jobb algoritmus... hmmm - #endif /* ZS_SORT_H */