thier3: added mlock/munlock for array and its temporary (you can turn this off)
This commit is contained in:
parent
ccdf991824
commit
d43b55f065
10
magyarsort.h
10
magyarsort.h
@ -334,7 +334,7 @@ namespace MagyarSort {
|
|||||||
static thread_local std::vector<uint32_t> arc(size * REUSE);
|
static thread_local std::vector<uint32_t> arc(size * REUSE);
|
||||||
|
|
||||||
#ifndef NO_MLOCK
|
#ifndef NO_MLOCK
|
||||||
mlock(arr, size * sizeof(uint32_t));
|
mlock(arr, size * sizeof(uint32_t));
|
||||||
#endif // !NO_MLOCK
|
#endif // !NO_MLOCK
|
||||||
|
|
||||||
// "Garbage-collection"
|
// "Garbage-collection"
|
||||||
@ -351,7 +351,7 @@ namespace MagyarSort {
|
|||||||
// First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB
|
// First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB
|
||||||
static thread_local COUNTER_TYP magics[DIGITS * DIGIT_RANGE];
|
static thread_local COUNTER_TYP magics[DIGITS * DIGIT_RANGE];
|
||||||
#ifndef NO_MLOCK
|
#ifndef NO_MLOCK
|
||||||
mlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP));
|
mlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP));
|
||||||
#endif // !NO_MLOCK
|
#endif // !NO_MLOCK
|
||||||
// Write prefetchin'
|
// Write prefetchin'
|
||||||
//__builtin_prefetch(&magicsOut[..], 1);
|
//__builtin_prefetch(&magicsOut[..], 1);
|
||||||
@ -389,7 +389,7 @@ namespace MagyarSort {
|
|||||||
arc = std::move(std::vector<uint32_t>(size));
|
arc = std::move(std::vector<uint32_t>(size));
|
||||||
}
|
}
|
||||||
#ifndef NO_MLOCK
|
#ifndef NO_MLOCK
|
||||||
mlock(&arc[0], size * sizeof(uint32_t));
|
mlock(&arc[0], size * sizeof(uint32_t));
|
||||||
#endif // !NO_MLOCK
|
#endif // !NO_MLOCK
|
||||||
|
|
||||||
uint32_t *from = arr;
|
uint32_t *from = arr;
|
||||||
@ -406,9 +406,9 @@ namespace MagyarSort {
|
|||||||
memcpy(arr, to, size);
|
memcpy(arr, to, size);
|
||||||
}
|
}
|
||||||
#ifndef NO_MLOCK
|
#ifndef NO_MLOCK
|
||||||
munlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP));
|
munlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP));
|
||||||
munlock(&arc[0], size * sizeof(uint32_t));
|
munlock(&arc[0], size * sizeof(uint32_t));
|
||||||
munlock(arr, size * sizeof(uint32_t));
|
munlock(arr, size * sizeof(uint32_t));
|
||||||
#endif // !NO_MLOCK
|
#endif // !NO_MLOCK
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
13
thiersort3.h
13
thiersort3.h
@ -3,6 +3,10 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
/* A non-implace tricky float-hackz based bucket sort variant. Uses threepass_xbit and removes some compies! */
|
/* A non-implace tricky float-hackz based bucket sort variant. Uses threepass_xbit and removes some compies! */
|
||||||
|
|
||||||
|
#ifndef NO_MLOCK
|
||||||
|
#include <sys/mman.h> // mlock & munlock
|
||||||
|
#endif // !NO_MLOCK
|
||||||
|
|
||||||
/* Disables extra 1-bit split processing before threepass_xb */
|
/* Disables extra 1-bit split processing before threepass_xb */
|
||||||
#define NO_EXTRA_BIT /* I measure extra split to be slightly slower because overhead - despite less cache misses */
|
#define NO_EXTRA_BIT /* I measure extra split to be slightly slower because overhead - despite less cache misses */
|
||||||
|
|
||||||
@ -66,6 +70,11 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef NO_MLOCK
|
||||||
|
mlock(arr, n * sizeof(uint32_t));
|
||||||
|
mlock(temparr, n * sizeof(uint32_t));
|
||||||
|
#endif // !NO_MLOCK
|
||||||
|
|
||||||
/* Count */
|
/* Count */
|
||||||
#pragma GCC unroll 64
|
#pragma GCC unroll 64
|
||||||
for(int i = 0; i < 256; ++i) {
|
for(int i = 0; i < 256; ++i) {
|
||||||
@ -179,6 +188,10 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* NO_EXTRA_BIT */
|
#endif /* NO_EXTRA_BIT */
|
||||||
|
#ifndef NO_MLOCK
|
||||||
|
munlock(arr, n * sizeof(uint32_t));
|
||||||
|
munlock(temparr, n * sizeof(uint32_t));
|
||||||
|
#endif // !NO_MLOCK
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* THIER_SORT3_H */
|
#endif /* THIER_SORT3_H */
|
||||||
|
|||||||
4
ypsu.cpp
4
ypsu.cpp
@ -895,8 +895,8 @@ int main(int argc, char **argv) {
|
|||||||
printf("Sorting %d elements:\n\n", n);
|
printf("Sorting %d elements:\n\n", n);
|
||||||
|
|
||||||
// Uncomment this for profiling and alg!
|
// Uncomment this for profiling and alg!
|
||||||
measure_single(n);
|
//measure_single(n);
|
||||||
return 0;
|
//return 0;
|
||||||
|
|
||||||
for (auto inputtype : inputtypes) {
|
for (auto inputtype : inputtypes) {
|
||||||
printf("%10s", inputtype.c_str());
|
printf("%10s", inputtype.c_str());
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user