duplication counting, word-based radix made possible (commented out), modulus impuit, vectorize makefile example

This commit is contained in:
Richard Thier 2022-09-01 01:56:15 +02:00
parent c02aa49f73
commit 7e8aa96a39
3 changed files with 56 additions and 4 deletions

View File

@ -44,6 +44,12 @@ namespace MagyarSort {
static constexpr int BITS_PER_DIGIT = 4; // "bit / helyiérték"
static constexpr int DIGIT_RANGE = 16; // "helyiérték állapottér"
#else
/*
// Per-word digits sorting
static constexpr int DIGITS = 2; // "helyiérték"
static constexpr int BITS_PER_DIGIT = 16; // "bit / helyiérték"
static constexpr int DIGIT_RANGE = 65536; // "helyiérték állapottér"
*/
// Per-byte digits sorting
static constexpr int DIGITS = 4; // "helyiérték"
static constexpr int BITS_PER_DIGIT = 8; // "bit / helyiérték"
@ -240,7 +246,22 @@ namespace MagyarSort {
memset(prev, 0, sizeof(prev));
// This is a template-unrolled loop too
PMagic2<DIGIT_RANGE - 1, COUNTER_TYP>(radics, prev);
if constexpr (DIGIT_RANGE < 1024) {
// Extra optimization for bytes and nibbles - totally unrolled loop!
PMagic2<DIGIT_RANGE - 1, COUNTER_TYP>(radics, prev);
} else {
// The above would not work for words and higher up...
#pragma GCC unroll 16
for(int j = 0; j < DIGITS; ++j) {
int offset = 0;
#pragma GCC unroll 64
for(int i = 0; i < DIGIT_RANGE; ++i) {
int DSTART = (j * DIGIT_RANGE);
radics[DSTART + i] += prev[j];
prev[j] = radics[DSTART + i];
}
}
}
}
/** Recursive Functor: no class should be generated I think (compiler should be smart) */
@ -334,7 +355,9 @@ namespace MagyarSort {
#endif // !NO_MLOCK
// Write prefetchin'
//__builtin_prefetch(&radicsOut[..], 1);
PrefetchMagic<DIGITS * DIGIT_RANGE, (64/sizeof(COUNTER_TYP)), COUNTER_TYP, 1/*w*/> pm(radics);
if constexpr (DIGIT_RANGE <= 1024) {
PrefetchMagic<DIGITS * DIGIT_RANGE, (64/sizeof(COUNTER_TYP)), COUNTER_TYP, 1/*w*/> pm(radics);
}
memset(radics, 0, sizeof(radics));
// Calculate occurences of digits

View File

@ -12,6 +12,7 @@ release_debug_sym: test.cpp magyarsort.h
release: test.cpp magyarsort.h
g++ test.cpp -DNDEBUG -std=c++17 -O2 -o test.out
# g++ test.cpp -DNDEBUG -std=c++17 -O2 -ftree-vectorize -fopt-info-vec-missed -o test.out
release_ypsu: ypsu.cpp magyarsort.h
g++ ypsu.cpp -DNDEBUG -std=c++17 -O2 -o ypsu.out

View File

@ -5,15 +5,21 @@
// Uncomment next line to follow Creel: https://www.youtube.com/watch?v=ujb2CIWE8zY
// #define CREEL // Overwrites TEST_LEN to 16 and sets MAGYAR_SORT_NIBBLE!
// Uncomment and give a value for input being modulo this value!
//#define INPUT_MOD (65536*128)
// Number of input elements to generate - unused when CREEL is defined!
#define SORT_WIDTH 200000000
//#define SORT_WIDTH 40000000
//#define SORT_WIDTH 200000000
#define SORT_WIDTH 40000000
// Uncomment this to use nibbles as digits and not bytes - CREEL defines this anyways
//#define MAGYAR_SORT_NIBBLE
// Uncomment if you want to see output before / after sorts (debugging for example)
//#define PRINT_OUTPUT
// Uncomment if you want to see how many elements are unique and duplicant in the input (debugging info)
#define COUNT_DUPLICANTS
//#define SKA_SORT
// Uncomment for perf / cachegring and similar runs!
@ -86,7 +92,11 @@ static inline std::vector<uint32_t> GenerateInput() {
ret.resize(SORT_WIDTH);
for(size_t ek = 0; ek < SORT_WIDTH; ++ek) {
#ifndef INPUT_MOD
ret[ek] = (uint32_t)std::rand();
#else
ret[ek] = (uint32_t)std::rand() % INPUT_MOD;
#endif
}
return ret;
@ -155,9 +165,27 @@ int main() {
#ifndef MEASURE_ONLY
bool good = true;
#ifdef COUNT_DUPLICANTS
size_t dups = 0;
uint32_t prev = (in1.size() > 0) ? in1[0] : 0;
#endif // COUNT_DUPLICANTS
for(size_t i = 0; good && (i < in1.size()); ++i) {
good &= (in1[i] == in2[i]);
#ifdef COUNT_DUPLICANTS
if(i > 0) {
uint32_t curr = in1[i];
if(curr == prev) {
++dups;
} else {
prev = curr;
}
}
#endif // COUNT_DUPLICANTS
}
#ifdef COUNT_DUPLICANTS
printf("Duplications are %d out of %d, which is %f percent\n", dups, in1.size(), (float)(dups * 100) / in1.size());
#endif // COUNT_DUPLICANTS
#endif // !MEASURE_ONLY
printf("Results:\n\n");