duplication counting, word-based radix made possible (commented out), modulus impuit, vectorize makefile example
This commit is contained in:
parent
c02aa49f73
commit
7e8aa96a39
27
magyarsort.h
27
magyarsort.h
@ -44,6 +44,12 @@ namespace MagyarSort {
|
||||
static constexpr int BITS_PER_DIGIT = 4; // "bit / helyiérték"
|
||||
static constexpr int DIGIT_RANGE = 16; // "helyiérték állapottér"
|
||||
#else
|
||||
/*
|
||||
// Per-word digits sorting
|
||||
static constexpr int DIGITS = 2; // "helyiérték"
|
||||
static constexpr int BITS_PER_DIGIT = 16; // "bit / helyiérték"
|
||||
static constexpr int DIGIT_RANGE = 65536; // "helyiérték állapottér"
|
||||
*/
|
||||
// Per-byte digits sorting
|
||||
static constexpr int DIGITS = 4; // "helyiérték"
|
||||
static constexpr int BITS_PER_DIGIT = 8; // "bit / helyiérték"
|
||||
@ -240,7 +246,22 @@ namespace MagyarSort {
|
||||
memset(prev, 0, sizeof(prev));
|
||||
|
||||
// This is a template-unrolled loop too
|
||||
PMagic2<DIGIT_RANGE - 1, COUNTER_TYP>(radics, prev);
|
||||
if constexpr (DIGIT_RANGE < 1024) {
|
||||
// Extra optimization for bytes and nibbles - totally unrolled loop!
|
||||
PMagic2<DIGIT_RANGE - 1, COUNTER_TYP>(radics, prev);
|
||||
} else {
|
||||
// The above would not work for words and higher up...
|
||||
#pragma GCC unroll 16
|
||||
for(int j = 0; j < DIGITS; ++j) {
|
||||
int offset = 0;
|
||||
#pragma GCC unroll 64
|
||||
for(int i = 0; i < DIGIT_RANGE; ++i) {
|
||||
int DSTART = (j * DIGIT_RANGE);
|
||||
radics[DSTART + i] += prev[j];
|
||||
prev[j] = radics[DSTART + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Recursive Functor: no class should be generated I think (compiler should be smart) */
|
||||
@ -334,7 +355,9 @@ namespace MagyarSort {
|
||||
#endif // !NO_MLOCK
|
||||
// Write prefetchin'
|
||||
//__builtin_prefetch(&radicsOut[..], 1);
|
||||
PrefetchMagic<DIGITS * DIGIT_RANGE, (64/sizeof(COUNTER_TYP)), COUNTER_TYP, 1/*w*/> pm(radics);
|
||||
if constexpr (DIGIT_RANGE <= 1024) {
|
||||
PrefetchMagic<DIGITS * DIGIT_RANGE, (64/sizeof(COUNTER_TYP)), COUNTER_TYP, 1/*w*/> pm(radics);
|
||||
}
|
||||
memset(radics, 0, sizeof(radics));
|
||||
|
||||
// Calculate occurences of digits
|
||||
|
||||
1
makefile
1
makefile
@ -12,6 +12,7 @@ release_debug_sym: test.cpp magyarsort.h
|
||||
|
||||
release: test.cpp magyarsort.h
|
||||
g++ test.cpp -DNDEBUG -std=c++17 -O2 -o test.out
|
||||
# g++ test.cpp -DNDEBUG -std=c++17 -O2 -ftree-vectorize -fopt-info-vec-missed -o test.out
|
||||
|
||||
release_ypsu: ypsu.cpp magyarsort.h
|
||||
g++ ypsu.cpp -DNDEBUG -std=c++17 -O2 -o ypsu.out
|
||||
|
||||
32
test.cpp
32
test.cpp
@ -5,15 +5,21 @@
|
||||
// Uncomment next line to follow Creel: https://www.youtube.com/watch?v=ujb2CIWE8zY
|
||||
// #define CREEL // Overwrites TEST_LEN to 16 and sets MAGYAR_SORT_NIBBLE!
|
||||
|
||||
// Uncomment and give a value for input being modulo this value!
|
||||
//#define INPUT_MOD (65536*128)
|
||||
|
||||
// Number of input elements to generate - unused when CREEL is defined!
|
||||
#define SORT_WIDTH 200000000
|
||||
//#define SORT_WIDTH 40000000
|
||||
//#define SORT_WIDTH 200000000
|
||||
#define SORT_WIDTH 40000000
|
||||
// Uncomment this to use nibbles as digits and not bytes - CREEL defines this anyways
|
||||
//#define MAGYAR_SORT_NIBBLE
|
||||
|
||||
// Uncomment if you want to see output before / after sorts (debugging for example)
|
||||
//#define PRINT_OUTPUT
|
||||
|
||||
// Uncomment if you want to see how many elements are unique and duplicant in the input (debugging info)
|
||||
#define COUNT_DUPLICANTS
|
||||
|
||||
//#define SKA_SORT
|
||||
|
||||
// Uncomment for perf / cachegring and similar runs!
|
||||
@ -86,7 +92,11 @@ static inline std::vector<uint32_t> GenerateInput() {
|
||||
ret.resize(SORT_WIDTH);
|
||||
|
||||
for(size_t ek = 0; ek < SORT_WIDTH; ++ek) {
|
||||
#ifndef INPUT_MOD
|
||||
ret[ek] = (uint32_t)std::rand();
|
||||
#else
|
||||
ret[ek] = (uint32_t)std::rand() % INPUT_MOD;
|
||||
#endif
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -155,9 +165,27 @@ int main() {
|
||||
|
||||
#ifndef MEASURE_ONLY
|
||||
bool good = true;
|
||||
#ifdef COUNT_DUPLICANTS
|
||||
size_t dups = 0;
|
||||
uint32_t prev = (in1.size() > 0) ? in1[0] : 0;
|
||||
#endif // COUNT_DUPLICANTS
|
||||
for(size_t i = 0; good && (i < in1.size()); ++i) {
|
||||
good &= (in1[i] == in2[i]);
|
||||
#ifdef COUNT_DUPLICANTS
|
||||
if(i > 0) {
|
||||
uint32_t curr = in1[i];
|
||||
if(curr == prev) {
|
||||
++dups;
|
||||
} else {
|
||||
prev = curr;
|
||||
}
|
||||
}
|
||||
#endif // COUNT_DUPLICANTS
|
||||
}
|
||||
#ifdef COUNT_DUPLICANTS
|
||||
printf("Duplications are %d out of %d, which is %f percent\n", dups, in1.size(), (float)(dups * 100) / in1.size());
|
||||
#endif // COUNT_DUPLICANTS
|
||||
|
||||
#endif // !MEASURE_ONLY
|
||||
|
||||
printf("Results:\n\n");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user