thier3: added mlock/munlock for array and its temporary (you can turn this off)
This commit is contained in:
parent
ccdf991824
commit
d43b55f065
10
magyarsort.h
10
magyarsort.h
@ -334,7 +334,7 @@ namespace MagyarSort {
|
||||
static thread_local std::vector<uint32_t> arc(size * REUSE);
|
||||
|
||||
#ifndef NO_MLOCK
|
||||
mlock(arr, size * sizeof(uint32_t));
|
||||
mlock(arr, size * sizeof(uint32_t));
|
||||
#endif // !NO_MLOCK
|
||||
|
||||
// "Garbage-collection"
|
||||
@ -351,7 +351,7 @@ namespace MagyarSort {
|
||||
// First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB
|
||||
static thread_local COUNTER_TYP magics[DIGITS * DIGIT_RANGE];
|
||||
#ifndef NO_MLOCK
|
||||
mlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP));
|
||||
mlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP));
|
||||
#endif // !NO_MLOCK
|
||||
// Write prefetchin'
|
||||
//__builtin_prefetch(&magicsOut[..], 1);
|
||||
@ -389,7 +389,7 @@ namespace MagyarSort {
|
||||
arc = std::move(std::vector<uint32_t>(size));
|
||||
}
|
||||
#ifndef NO_MLOCK
|
||||
mlock(&arc[0], size * sizeof(uint32_t));
|
||||
mlock(&arc[0], size * sizeof(uint32_t));
|
||||
#endif // !NO_MLOCK
|
||||
|
||||
uint32_t *from = arr;
|
||||
@ -406,9 +406,9 @@ namespace MagyarSort {
|
||||
memcpy(arr, to, size);
|
||||
}
|
||||
#ifndef NO_MLOCK
|
||||
munlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP));
|
||||
munlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP));
|
||||
munlock(&arc[0], size * sizeof(uint32_t));
|
||||
munlock(arr, size * sizeof(uint32_t));
|
||||
munlock(arr, size * sizeof(uint32_t));
|
||||
#endif // !NO_MLOCK
|
||||
}
|
||||
|
||||
|
||||
13
thiersort3.h
13
thiersort3.h
@ -3,6 +3,10 @@
|
||||
#include <stdint.h>
|
||||
/* A non-implace tricky float-hackz based bucket sort variant. Uses threepass_xbit and removes some compies! */
|
||||
|
||||
#ifndef NO_MLOCK
|
||||
#include <sys/mman.h> // mlock & munlock
|
||||
#endif // !NO_MLOCK
|
||||
|
||||
/* Disables extra 1-bit split processing before threepass_xb */
|
||||
#define NO_EXTRA_BIT /* I measure extra split to be slightly slower because overhead - despite less cache misses */
|
||||
|
||||
@ -66,6 +70,11 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifndef NO_MLOCK
|
||||
mlock(arr, n * sizeof(uint32_t));
|
||||
mlock(temparr, n * sizeof(uint32_t));
|
||||
#endif // !NO_MLOCK
|
||||
|
||||
/* Count */
|
||||
#pragma GCC unroll 64
|
||||
for(int i = 0; i < 256; ++i) {
|
||||
@ -179,6 +188,10 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) {
|
||||
}
|
||||
}
|
||||
#endif /* NO_EXTRA_BIT */
|
||||
#ifndef NO_MLOCK
|
||||
munlock(arr, n * sizeof(uint32_t));
|
||||
munlock(temparr, n * sizeof(uint32_t));
|
||||
#endif // !NO_MLOCK
|
||||
}
|
||||
|
||||
#endif /* THIER_SORT3_H */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user