little optimization to gpts and mine space partition bucket sort
This commit is contained in:
parent
7e21807668
commit
50b1997d5c
38
gptsort.h
38
gptsort.h
@ -2,6 +2,7 @@
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
// ChatGPT and me did this space partitioning bucket sort
|
||||
void gpt_bucket_sort(uint32_t* array, int n) {
|
||||
// Calculate the number of buckets to use
|
||||
int num_buckets = std::sqrt(n);
|
||||
@ -36,3 +37,40 @@ void gpt_bucket_sort(uint32_t* array, int n) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Further optimizations (no chatGPT)
|
||||
void my_bucket_sort(uint32_t* array, int n) {
|
||||
// Calculate the number of buckets to use
|
||||
int num_buckets = std::sqrt(n);
|
||||
|
||||
// Create a vector of buckets
|
||||
std::vector<std::vector<uint32_t>> buckets(num_buckets);
|
||||
|
||||
// Calculate the range of values that each bucket can hold
|
||||
auto mm = std::minmax_element(array, array + n);
|
||||
uint32_t min_value = *mm.first;
|
||||
uint32_t max_value = *mm.second;
|
||||
uint32_t range = max_value - min_value + 1;
|
||||
uint32_t bucket_size = range / num_buckets + 1;
|
||||
|
||||
// Distribute the elements of the array into the buckets
|
||||
for (int i = 0; i < n; i++) {
|
||||
// Calculate the bucket index for this element
|
||||
// using the range of values and the bucket size as the divisor
|
||||
int bucket_index = (array[i] - min_value) / bucket_size;
|
||||
buckets[bucket_index].push_back(array[i]);
|
||||
}
|
||||
|
||||
// Sort the elements in each bucket using std::sort
|
||||
for (int i = 0; i < num_buckets; i++) {
|
||||
std::sort(buckets[i].begin(), buckets[i].end());
|
||||
}
|
||||
|
||||
// Concatenate the buckets to get the sorted array
|
||||
int k = 0;
|
||||
for (int i = 0; i < num_buckets; i++) {
|
||||
for (int j = 0; j < buckets[i].size(); j++) {
|
||||
array[k++] = buckets[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
3
makefile
3
makefile
@ -31,5 +31,8 @@ clang_release: test.cpp magyarsort.h
|
||||
clang_release3: test.cpp magyarsort.h
|
||||
clang++ test.cpp -DNDEBUG -std=c++17 -O3 -o test.out
|
||||
|
||||
clang_release_ypsu: ypsu.cpp magyarsort.h
|
||||
clang++ ypsu.cpp -DNDEBUG -std=c++17 -O2 -o ypsu.out
|
||||
|
||||
clean: test.out
|
||||
rm test.out
|
||||
|
||||
116
ypsu.cpp
116
ypsu.cpp
@ -1,28 +1,28 @@
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <sys/mman.h> // mlock & munlock
|
||||
#include "ska_sort.hpp"
|
||||
#include "gptsort.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <sys/mman.h> // mlock & munlock
|
||||
#include "ska_sort.hpp"
|
||||
#include "gptsort.h"
|
||||
|
||||
#define MAGYAR_SORT_DEFAULT_REUSE
|
||||
#include "magyarsort.h"
|
||||
#define MAGYAR_SORT_DEFAULT_REUSE
|
||||
#include "magyarsort.h"
|
||||
|
||||
#include "space_partitioning_sort/spsort.h"
|
||||
#include "space_partitioning_sort/spsort.h"
|
||||
|
||||
std::map<std::string, double> results;
|
||||
std::map<std::string, double> worst;
|
||||
void measure(const std::string &inputtype, const std::string &name,
|
||||
std::map<std::string, double> results;
|
||||
std::map<std::string, double> worst;
|
||||
void measure(const std::string &inputtype, const std::string &name,
|
||||
std::function<void()> f) {
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
f();
|
||||
@ -30,12 +30,12 @@
|
||||
double seconds = dur / std::chrono::milliseconds(1) / 1000.0;
|
||||
results[name] = seconds;
|
||||
worst[name] = std::max(worst[name], seconds);
|
||||
}
|
||||
std::vector<std::string> inputtypes = {
|
||||
}
|
||||
std::vector<std::string> inputtypes = {
|
||||
"constant", "asc", "desc", "ascasc", "ascdesc",
|
||||
"descasc", "descdesc", "smallrange", "rand",
|
||||
};
|
||||
std::vector<uint32_t> geninput(const std::string &type, int n) {
|
||||
};
|
||||
std::vector<uint32_t> geninput(const std::string &type, int n) {
|
||||
std::vector<uint32_t> v(n);
|
||||
if (type == "constant") {
|
||||
int c = rand();
|
||||
@ -81,9 +81,9 @@
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
}
|
||||
|
||||
void twopass(uint32_t *a, int n) {
|
||||
void twopass(uint32_t *a, int n) {
|
||||
assert(n * int64_t(sizeof(a[0])) <= INT_MAX);
|
||||
// alloc helper buffers.
|
||||
int sz = n * sizeof(a[0]);
|
||||
@ -110,8 +110,8 @@
|
||||
}
|
||||
for (int i = 0; i < n; i++) a[bucketdata[buf[i] >> 16]++] = buf[i];
|
||||
free(buf);
|
||||
}
|
||||
void fourpass(uint32_t *a, int n) {
|
||||
}
|
||||
void fourpass(uint32_t *a, int n) {
|
||||
assert(n * int64_t(sizeof(a[0])) <= INT_MAX);
|
||||
// alloc helper buffers.
|
||||
int sz = n * sizeof(a[0]);
|
||||
@ -136,15 +136,15 @@
|
||||
dst = (uint32_t *)((uintptr_t)dst ^ swapmask);
|
||||
}
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
/** Only werks für das fourpassu! */
|
||||
void my_memset(int *v) {
|
||||
/** Only werks für das fourpassu! */
|
||||
void my_memset(int *v) {
|
||||
memset(v, 0, (1 << 8) * sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
// hand-unrolled fourpass.
|
||||
void fourpassu(uint32_t *a, int n) {
|
||||
// hand-unrolled fourpass.
|
||||
void fourpassu(uint32_t *a, int n) {
|
||||
assert(n * int64_t(sizeof(a[0])) <= INT_MAX);
|
||||
// alloc helper buffers.
|
||||
int sz = n * sizeof(a[0]);
|
||||
@ -205,10 +205,10 @@
|
||||
#pragma GCC unroll 32
|
||||
for (int i = 0; i < n; i++) a[bucketdata[buf[i] >> 24 & 0xff]++] = buf[i];
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t byterotate(uint32_t x) { return (x >> 8) | (x << 24); }
|
||||
void fourrots(uint32_t *arr, int n) {
|
||||
static inline uint32_t byterotate(uint32_t x) { return (x >> 8) | (x << 24); }
|
||||
void fourrots(uint32_t *arr, int n) {
|
||||
assert(n * int64_t(sizeof(arr[0])) <= INT_MAX);
|
||||
assert(n % 4 == 0);
|
||||
// alloc helper buffers.
|
||||
@ -261,10 +261,10 @@
|
||||
dst = (uint32_t *)((uintptr_t)dst ^ swapmask);
|
||||
}
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
// frewr - four rewrites.
|
||||
void frewr(uint32_t *arr, int n) {
|
||||
// frewr - four rewrites.
|
||||
void frewr(uint32_t *arr, int n) {
|
||||
uint32_t *tmpbuf = (uint32_t *)malloc(n * 4);
|
||||
mlock(tmpbuf, n * 4);
|
||||
int btoffsets[4][256] = {};
|
||||
@ -302,9 +302,9 @@
|
||||
}
|
||||
munlock(tmpbuf, n * 4);
|
||||
free(tmpbuf);
|
||||
}
|
||||
}
|
||||
|
||||
void vsort(uint32_t *a, int n) {
|
||||
void vsort(uint32_t *a, int n) {
|
||||
thread_local std::vector<uint32_t> bts[256];
|
||||
#pragma GCC unroll 4
|
||||
for (int shift = 0; shift < 32; shift += 8) {
|
||||
@ -317,8 +317,8 @@
|
||||
bts[bt].clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
void pagedsort(uint32_t *a, int n) {
|
||||
}
|
||||
void pagedsort(uint32_t *a, int n) {
|
||||
enum { pagesize = 1024 };
|
||||
int pagecount = (n + pagesize - 1) / pagesize + 512;
|
||||
uint32_t *pd = (uint32_t *)malloc(pagecount * pagesize * sizeof(a[0]));
|
||||
@ -390,16 +390,17 @@
|
||||
}
|
||||
}
|
||||
free(pd);
|
||||
}
|
||||
}
|
||||
|
||||
// to measure / profile a single variant
|
||||
void measure_single(int n) {
|
||||
// to measure / profile a single variant
|
||||
void measure_single(int n) {
|
||||
for (auto inputtype : inputtypes) {
|
||||
printf("%10s", inputtype.c_str());
|
||||
fflush(stdout);
|
||||
std::vector<uint32_t> v(n);
|
||||
v = geninput(inputtype, n);
|
||||
measure(inputtype, "sp", [&] { spsort(&v[0], v.size()); });
|
||||
//measure(inputtype, "sp", [&] { spsort(&v[0], v.size()); });
|
||||
measure(inputtype, "magyar", [&] { MagyarSort::sort<uint32_t>(&v[0], v.size()); });
|
||||
|
||||
for (auto r : results) printf("%9.3fs", r.second);
|
||||
puts("");
|
||||
@ -411,11 +412,11 @@
|
||||
printf("%10s", "");
|
||||
for (auto w : worst) printf("%10s", w.first.c_str());
|
||||
puts("");
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
int n = 100000000;
|
||||
//int n = 10000000;
|
||||
int main(void) {
|
||||
//int n = 100000000;
|
||||
int n = 10000000;
|
||||
//int n = 100;
|
||||
|
||||
// Uncomment this for profiling and alg!
|
||||
@ -457,16 +458,17 @@
|
||||
w = v;
|
||||
measure(inputtype, "4pasu", [&] { fourpassu(&w[0], w.size()); });
|
||||
assert(w == expected);
|
||||
*/
|
||||
w = v;
|
||||
measure(inputtype, "4rot", [&] { fourrots(&w[0], w.size()); });
|
||||
assert(w == expected);
|
||||
w = v;
|
||||
/*measure(inputtype, "sp", [&] { spsort(&w[0], w.size()); });
|
||||
measure(inputtype, "sp", [&] { spsort(&w[0], w.size()); });
|
||||
assert(w == expected);
|
||||
w = v;*/
|
||||
measure(inputtype, "gptbuck", [&] { gpt_bucket_sort(&w[0], w.size()); });
|
||||
assert(w == expected);
|
||||
measure(inputtype, "mybuck", [&] { my_bucket_sort(&w[0], w.size()); });
|
||||
assert(w == expected);
|
||||
/*
|
||||
w = v;
|
||||
measure(inputtype, "frewr", [&] { frewr(&w[0], w.size()); });
|
||||
@ -487,4 +489,4 @@
|
||||
for (auto w : worst) printf("%10s", w.first.c_str());
|
||||
puts("");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user