duplication counting, word-based radix made possible (commented out), modulus impuit, vectorize makefile example
This commit is contained in:
parent
c02aa49f73
commit
7e8aa96a39
23
magyarsort.h
23
magyarsort.h
@ -44,6 +44,12 @@ namespace MagyarSort {
|
|||||||
static constexpr int BITS_PER_DIGIT = 4; // "bit / helyiérték"
|
static constexpr int BITS_PER_DIGIT = 4; // "bit / helyiérték"
|
||||||
static constexpr int DIGIT_RANGE = 16; // "helyiérték állapottér"
|
static constexpr int DIGIT_RANGE = 16; // "helyiérték állapottér"
|
||||||
#else
|
#else
|
||||||
|
/*
|
||||||
|
// Per-word digits sorting
|
||||||
|
static constexpr int DIGITS = 2; // "helyiérték"
|
||||||
|
static constexpr int BITS_PER_DIGIT = 16; // "bit / helyiérték"
|
||||||
|
static constexpr int DIGIT_RANGE = 65536; // "helyiérték állapottér"
|
||||||
|
*/
|
||||||
// Per-byte digits sorting
|
// Per-byte digits sorting
|
||||||
static constexpr int DIGITS = 4; // "helyiérték"
|
static constexpr int DIGITS = 4; // "helyiérték"
|
||||||
static constexpr int BITS_PER_DIGIT = 8; // "bit / helyiérték"
|
static constexpr int BITS_PER_DIGIT = 8; // "bit / helyiérték"
|
||||||
@ -240,7 +246,22 @@ namespace MagyarSort {
|
|||||||
memset(prev, 0, sizeof(prev));
|
memset(prev, 0, sizeof(prev));
|
||||||
|
|
||||||
// This is a template-unrolled loop too
|
// This is a template-unrolled loop too
|
||||||
|
if constexpr (DIGIT_RANGE < 1024) {
|
||||||
|
// Extra optimization for bytes and nibbles - totally unrolled loop!
|
||||||
PMagic2<DIGIT_RANGE - 1, COUNTER_TYP>(radics, prev);
|
PMagic2<DIGIT_RANGE - 1, COUNTER_TYP>(radics, prev);
|
||||||
|
} else {
|
||||||
|
// The above would not work for words and higher up...
|
||||||
|
#pragma GCC unroll 16
|
||||||
|
for(int j = 0; j < DIGITS; ++j) {
|
||||||
|
int offset = 0;
|
||||||
|
#pragma GCC unroll 64
|
||||||
|
for(int i = 0; i < DIGIT_RANGE; ++i) {
|
||||||
|
int DSTART = (j * DIGIT_RANGE);
|
||||||
|
radics[DSTART + i] += prev[j];
|
||||||
|
prev[j] = radics[DSTART + i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Recursive Functor: no class should be generated I think (compiler should be smart) */
|
/** Recursive Functor: no class should be generated I think (compiler should be smart) */
|
||||||
@ -334,7 +355,9 @@ namespace MagyarSort {
|
|||||||
#endif // !NO_MLOCK
|
#endif // !NO_MLOCK
|
||||||
// Write prefetchin'
|
// Write prefetchin'
|
||||||
//__builtin_prefetch(&radicsOut[..], 1);
|
//__builtin_prefetch(&radicsOut[..], 1);
|
||||||
|
if constexpr (DIGIT_RANGE <= 1024) {
|
||||||
PrefetchMagic<DIGITS * DIGIT_RANGE, (64/sizeof(COUNTER_TYP)), COUNTER_TYP, 1/*w*/> pm(radics);
|
PrefetchMagic<DIGITS * DIGIT_RANGE, (64/sizeof(COUNTER_TYP)), COUNTER_TYP, 1/*w*/> pm(radics);
|
||||||
|
}
|
||||||
memset(radics, 0, sizeof(radics));
|
memset(radics, 0, sizeof(radics));
|
||||||
|
|
||||||
// Calculate occurences of digits
|
// Calculate occurences of digits
|
||||||
|
|||||||
1
makefile
1
makefile
@ -12,6 +12,7 @@ release_debug_sym: test.cpp magyarsort.h
|
|||||||
|
|
||||||
release: test.cpp magyarsort.h
|
release: test.cpp magyarsort.h
|
||||||
g++ test.cpp -DNDEBUG -std=c++17 -O2 -o test.out
|
g++ test.cpp -DNDEBUG -std=c++17 -O2 -o test.out
|
||||||
|
# g++ test.cpp -DNDEBUG -std=c++17 -O2 -ftree-vectorize -fopt-info-vec-missed -o test.out
|
||||||
|
|
||||||
release_ypsu: ypsu.cpp magyarsort.h
|
release_ypsu: ypsu.cpp magyarsort.h
|
||||||
g++ ypsu.cpp -DNDEBUG -std=c++17 -O2 -o ypsu.out
|
g++ ypsu.cpp -DNDEBUG -std=c++17 -O2 -o ypsu.out
|
||||||
|
|||||||
32
test.cpp
32
test.cpp
@ -5,15 +5,21 @@
|
|||||||
// Uncomment next line to follow Creel: https://www.youtube.com/watch?v=ujb2CIWE8zY
|
// Uncomment next line to follow Creel: https://www.youtube.com/watch?v=ujb2CIWE8zY
|
||||||
// #define CREEL // Overwrites TEST_LEN to 16 and sets MAGYAR_SORT_NIBBLE!
|
// #define CREEL // Overwrites TEST_LEN to 16 and sets MAGYAR_SORT_NIBBLE!
|
||||||
|
|
||||||
|
// Uncomment and give a value for input being modulo this value!
|
||||||
|
//#define INPUT_MOD (65536*128)
|
||||||
|
|
||||||
// Number of input elements to generate - unused when CREEL is defined!
|
// Number of input elements to generate - unused when CREEL is defined!
|
||||||
#define SORT_WIDTH 200000000
|
//#define SORT_WIDTH 200000000
|
||||||
//#define SORT_WIDTH 40000000
|
#define SORT_WIDTH 40000000
|
||||||
// Uncomment this to use nibbles as digits and not bytes - CREEL defines this anyways
|
// Uncomment this to use nibbles as digits and not bytes - CREEL defines this anyways
|
||||||
//#define MAGYAR_SORT_NIBBLE
|
//#define MAGYAR_SORT_NIBBLE
|
||||||
|
|
||||||
// Uncomment if you want to see output before / after sorts (debugging for example)
|
// Uncomment if you want to see output before / after sorts (debugging for example)
|
||||||
//#define PRINT_OUTPUT
|
//#define PRINT_OUTPUT
|
||||||
|
|
||||||
|
// Uncomment if you want to see how many elements are unique and duplicant in the input (debugging info)
|
||||||
|
#define COUNT_DUPLICANTS
|
||||||
|
|
||||||
//#define SKA_SORT
|
//#define SKA_SORT
|
||||||
|
|
||||||
// Uncomment for perf / cachegring and similar runs!
|
// Uncomment for perf / cachegring and similar runs!
|
||||||
@ -86,7 +92,11 @@ static inline std::vector<uint32_t> GenerateInput() {
|
|||||||
ret.resize(SORT_WIDTH);
|
ret.resize(SORT_WIDTH);
|
||||||
|
|
||||||
for(size_t ek = 0; ek < SORT_WIDTH; ++ek) {
|
for(size_t ek = 0; ek < SORT_WIDTH; ++ek) {
|
||||||
|
#ifndef INPUT_MOD
|
||||||
ret[ek] = (uint32_t)std::rand();
|
ret[ek] = (uint32_t)std::rand();
|
||||||
|
#else
|
||||||
|
ret[ek] = (uint32_t)std::rand() % INPUT_MOD;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -155,9 +165,27 @@ int main() {
|
|||||||
|
|
||||||
#ifndef MEASURE_ONLY
|
#ifndef MEASURE_ONLY
|
||||||
bool good = true;
|
bool good = true;
|
||||||
|
#ifdef COUNT_DUPLICANTS
|
||||||
|
size_t dups = 0;
|
||||||
|
uint32_t prev = (in1.size() > 0) ? in1[0] : 0;
|
||||||
|
#endif // COUNT_DUPLICANTS
|
||||||
for(size_t i = 0; good && (i < in1.size()); ++i) {
|
for(size_t i = 0; good && (i < in1.size()); ++i) {
|
||||||
good &= (in1[i] == in2[i]);
|
good &= (in1[i] == in2[i]);
|
||||||
|
#ifdef COUNT_DUPLICANTS
|
||||||
|
if(i > 0) {
|
||||||
|
uint32_t curr = in1[i];
|
||||||
|
if(curr == prev) {
|
||||||
|
++dups;
|
||||||
|
} else {
|
||||||
|
prev = curr;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
#endif // COUNT_DUPLICANTS
|
||||||
|
}
|
||||||
|
#ifdef COUNT_DUPLICANTS
|
||||||
|
printf("Duplications are %d out of %d, which is %f percent\n", dups, in1.size(), (float)(dups * 100) / in1.size());
|
||||||
|
#endif // COUNT_DUPLICANTS
|
||||||
|
|
||||||
#endif // !MEASURE_ONLY
|
#endif // !MEASURE_ONLY
|
||||||
|
|
||||||
printf("Results:\n\n");
|
printf("Results:\n\n");
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user