Compare commits
4 Commits
d80a061240
...
c06f02bc94
Author | SHA1 | Date | |
---|---|---|---|
|
c06f02bc94 | ||
|
6fcf79bee3 | ||
|
6d8802f479 | ||
|
5775e6c201 |
2
data.inc
2
data.inc
@ -1,5 +1,5 @@
|
|||||||
int data[] = {
|
int data[] = {
|
||||||
8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,1,
|
||||||
8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
8, 7, 2, 1, 0, 9, 6,
|
8, 7, 2, 1, 0, 9, 6,
|
||||||
|
18
qs.c
18
qs.c
@ -4,6 +4,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "qsort.h"
|
#include "qsort.h"
|
||||||
#include "zssort.h"
|
#include "zssort.h"
|
||||||
|
#include "schwab_sort.h"
|
||||||
|
|
||||||
// function to print array elements
|
// function to print array elements
|
||||||
void printArray(int array[], int size) {
|
void printArray(int array[], int size) {
|
||||||
@ -102,6 +103,22 @@ void qs3_sp2() {
|
|||||||
printArray(data, n);
|
printArray(data, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void schwab() {
|
||||||
|
#include "data.inc"
|
||||||
|
|
||||||
|
int n = sizeof(data) / sizeof(data[0]);
|
||||||
|
|
||||||
|
// memory junnnk is enough
|
||||||
|
uint32_t junk;
|
||||||
|
sch_rand_state rand = schwab_rand_state(junk);
|
||||||
|
|
||||||
|
// perform sort on data
|
||||||
|
schwab_sort(data, 0, n - 1, &rand);
|
||||||
|
|
||||||
|
printf("(schwab) Sorted array in ascending order: \n");
|
||||||
|
printArray(data, n);
|
||||||
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
qs();
|
qs();
|
||||||
qsr();
|
qsr();
|
||||||
@ -109,5 +126,6 @@ int main() {
|
|||||||
qs3();
|
qs3();
|
||||||
qs3_sp();
|
qs3_sp();
|
||||||
qs3_sp2();
|
qs3_sp2();
|
||||||
|
schwab();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
179
schwab_sort.h
Normal file
179
schwab_sort.h
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
#ifndef SWAB_SORT_H
|
||||||
|
#define SWAB_SORT_H
|
||||||
|
|
||||||
|
/* A fast quicksort-like new alg created in Csolnok, Hungary with:
|
||||||
|
* - 4-way partitioning with 0..5 copies (not swaps) per elem per run
|
||||||
|
* - ensured O(log2(n)) worst recursion depth
|
||||||
|
*
|
||||||
|
* LICENCE: CC-BY, 2025 May 08
|
||||||
|
* Author: Richárd István Thier (also author of the Magyarsort)
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef uint32_t sch_rand_state;
|
||||||
|
|
||||||
|
/** Create rand state for schwab_sort using a seed - can give 0 if uninterested */
|
||||||
|
static inline sch_rand_state schwab_rand_state(uint32_t seed) {
|
||||||
|
return seed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 32-bit LCG for fast random generations - from my fastrand.h */
|
||||||
|
static inline uint32_t schwab_lcg(sch_rand_state *state) {
|
||||||
|
*state = *state * 1664525u + 1013904223u;
|
||||||
|
return *state;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get pivot index in [0, len-1] without modulus - from my fastrand.h */
|
||||||
|
static inline uint32_t schwab_pick_pivot(sch_rand_state *state, uint32_t len) {
|
||||||
|
uint32_t rand = schwab_lcg(state);
|
||||||
|
/* Multiply by len, take the upper 32 bits of the 64-bit result */
|
||||||
|
return (uint32_t)(((uint64_t)rand * len) >> 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 4-way partitioning
|
||||||
|
*
|
||||||
|
* Expects: arr[plo] <= kmid <= arr[phi]
|
||||||
|
* Results: arr[low..plo - 1] <= arr[plo..pmid - 1] <= arr[pmid..phi - 1] <= arr[phi.. high]
|
||||||
|
*
|
||||||
|
* Also: Adding together lengths of all results arrays shrinks by 1 compared to start arr.
|
||||||
|
* This means that we ensure recursions / loops always end in quicksort...
|
||||||
|
*
|
||||||
|
* @param arr The array to partition
|
||||||
|
* @param low Inclusive smallest index.
|
||||||
|
* @param high Inclusive highest index.
|
||||||
|
* @param plo IN-OUT: input low pivot, output index until elements <= low pivot.
|
||||||
|
* @param kmid IN: The mid spliting value (like a pivot value, but can be imaginary nonexistent)
|
||||||
|
* @param pmid OUT: output index until elements <= mid pivot.
|
||||||
|
* @param phi IN-OUT: input high pivot, output index until elements <= high pivot.
|
||||||
|
*/
|
||||||
|
static inline void schwab_partition(
|
||||||
|
uint32_t *arr,
|
||||||
|
int low,
|
||||||
|
int high,
|
||||||
|
int *plo,
|
||||||
|
uint32_t kmid,
|
||||||
|
int *pmid,
|
||||||
|
int *phi) {
|
||||||
|
|
||||||
|
/* Keys only - no element copy is made here */
|
||||||
|
uint32_t klo = arr[*plo];
|
||||||
|
uint32_t khi = arr[*phi];
|
||||||
|
|
||||||
|
/* [*] Swapping arr[phi]<->arr[high] ensures stop condition later */
|
||||||
|
uint32_t tmphi = arr[*phi];
|
||||||
|
arr[*phi] = arr[high];
|
||||||
|
arr[high] = tmphi;
|
||||||
|
|
||||||
|
/* Aren't inclusive end indices of 4 "blocks" - b0 is smallest vals */
|
||||||
|
int b0 = low, b1 = low, b2 = low, b3 = low;
|
||||||
|
|
||||||
|
while(b3 < high) {
|
||||||
|
/* This I moved to be first for hot code path for constant / smallrange */
|
||||||
|
if(arr[b3] >= khi) {
|
||||||
|
++b3;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: should be copy of whole element when not just uint32s! */
|
||||||
|
uint32_t curr = arr[b3];
|
||||||
|
|
||||||
|
/* TODO: We can do "ILP-memcpy"s here:
|
||||||
|
*
|
||||||
|
* Key from b2->b3, value from b2->b3, key from b1->b2, value from b1... etc
|
||||||
|
* This is likely faster than calling a memcpy if we code this for not just uint32s!
|
||||||
|
*/
|
||||||
|
if(curr < klo) {
|
||||||
|
arr[b3] = arr[b2];
|
||||||
|
arr[b2] = arr[b1];
|
||||||
|
arr[b1] = arr[b0];
|
||||||
|
arr[b0] = curr;
|
||||||
|
++b0; ++b1; ++b2; ++b3;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(curr < kmid) {
|
||||||
|
arr[b3] = arr[b2];
|
||||||
|
arr[b2] = arr[b1];
|
||||||
|
arr[b1] = curr;
|
||||||
|
++b1; ++b2; ++b3;
|
||||||
|
} else {
|
||||||
|
arr[b3] = arr[b2];
|
||||||
|
arr[b2] = curr;
|
||||||
|
++b2; ++b3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* [*] Swap the chosen pivot to begin of last block */
|
||||||
|
/* This way we can return bigger index and by that */
|
||||||
|
/* this always removes an element per run at least */
|
||||||
|
tmphi = arr[b2];
|
||||||
|
arr[b2] = arr[high];
|
||||||
|
arr[high] = tmphi;
|
||||||
|
++b2;
|
||||||
|
|
||||||
|
/* Handle output vars as per doc comment */
|
||||||
|
*plo = b0;
|
||||||
|
*pmid = b1;
|
||||||
|
*phi = b2; /* Because of: [*] */
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Swabic-sort its somewhat similar to quicksort but 4-way and tricky */
|
||||||
|
static inline void schwab_sort(
|
||||||
|
uint32_t *array,
|
||||||
|
int low,
|
||||||
|
int high,
|
||||||
|
sch_rand_state *state) {
|
||||||
|
|
||||||
|
/* Loop handles longest sub-sort-task which ensused log tree depth */
|
||||||
|
while(low < high) {
|
||||||
|
int r0 = schwab_pick_pivot(state, (high + 1) - low) + low;
|
||||||
|
int r1 = schwab_pick_pivot(state, (high + 1) - low) + low;
|
||||||
|
uint32_t klo = array[r0];
|
||||||
|
uint32_t khi = array[r1];
|
||||||
|
int plo = r0;
|
||||||
|
int phi = r1;
|
||||||
|
if(klo > khi) {
|
||||||
|
uint32_t ktmp = klo;
|
||||||
|
klo = khi;
|
||||||
|
khi = ktmp;
|
||||||
|
|
||||||
|
plo = r1;
|
||||||
|
phi = r0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t kmid = klo + (khi - klo) / 2;
|
||||||
|
|
||||||
|
int pmid;
|
||||||
|
schwab_partition(array, low, high, &plo, kmid, &pmid, &phi);
|
||||||
|
|
||||||
|
/* See where NOT to recurse to avoid worst case stack depth */
|
||||||
|
/* Rem.: These might be "not real" length but we only use them to comparisons */
|
||||||
|
/* REM.: The "real" lengths might be off-by-one but these are FASTER! */
|
||||||
|
int lolen = plo - low;
|
||||||
|
int hilen = high - phi;
|
||||||
|
|
||||||
|
/* Rewrite loop for worst subtask goal and recurse others! */
|
||||||
|
/* Let the branch predictor try to predict input data path */
|
||||||
|
/* Rem.: Best would be to check for biggest in all 4 block */
|
||||||
|
/* But that would complicate codes above this point! */
|
||||||
|
/* Rem.: Order of operations try to be a cache-friendly as */
|
||||||
|
/* possible, but had to put loops changes to the end */
|
||||||
|
if(lolen < hilen) {
|
||||||
|
schwab_sort(array, low, plo - 1, state);
|
||||||
|
schwab_sort(array, plo, pmid - 1, state);
|
||||||
|
schwab_sort(array, pmid, phi - 1, state);
|
||||||
|
|
||||||
|
low = phi;
|
||||||
|
/* high = high; */
|
||||||
|
} else {
|
||||||
|
schwab_sort(array, phi, high, state);
|
||||||
|
schwab_sort(array, pmid, phi - 1, state);
|
||||||
|
schwab_sort(array, plo, pmid - 1, state);
|
||||||
|
|
||||||
|
/* low = low; */
|
||||||
|
high = plo - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SWAB_SORT_H */
|
Loading…
x
Reference in New Issue
Block a user