diff --git a/magyarsort.h b/magyarsort.h index b3a085b..4aa23e8 100644 --- a/magyarsort.h +++ b/magyarsort.h @@ -334,7 +334,7 @@ namespace MagyarSort { static thread_local std::vector arc(size * REUSE); #ifndef NO_MLOCK - mlock(arr, size * sizeof(uint32_t)); + mlock(arr, size * sizeof(uint32_t)); #endif // !NO_MLOCK // "Garbage-collection" @@ -351,7 +351,7 @@ namespace MagyarSort { // First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB static thread_local COUNTER_TYP magics[DIGITS * DIGIT_RANGE]; #ifndef NO_MLOCK - mlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP)); + mlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP)); #endif // !NO_MLOCK // Write prefetchin' //__builtin_prefetch(&magicsOut[..], 1); @@ -389,7 +389,7 @@ namespace MagyarSort { arc = std::move(std::vector(size)); } #ifndef NO_MLOCK - mlock(&arc[0], size * sizeof(uint32_t)); + mlock(&arc[0], size * sizeof(uint32_t)); #endif // !NO_MLOCK uint32_t *from = arr; @@ -406,9 +406,9 @@ namespace MagyarSort { memcpy(arr, to, size); } #ifndef NO_MLOCK - munlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP)); + munlock(magics, (DIGITS * DIGIT_RANGE) * sizeof(COUNTER_TYP)); munlock(&arc[0], size * sizeof(uint32_t)); - munlock(arr, size * sizeof(uint32_t)); + munlock(arr, size * sizeof(uint32_t)); #endif // !NO_MLOCK } diff --git a/thiersort3.h b/thiersort3.h index e5df720..45a7deb 100644 --- a/thiersort3.h +++ b/thiersort3.h @@ -3,6 +3,10 @@ #include /* A non-implace tricky float-hackz based bucket sort variant. Uses threepass_xbit and removes some compies! */ +#ifndef NO_MLOCK +#include // mlock & munlock +#endif // !NO_MLOCK + /* Disables extra 1-bit split processing before threepass_xb */ #define NO_EXTRA_BIT /* I measure extra split to be slightly slower because overhead - despite less cache misses */ @@ -66,6 +70,11 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { return; } +#ifndef NO_MLOCK + mlock(arr, n * sizeof(uint32_t)); + mlock(temparr, n * sizeof(uint32_t)); +#endif // !NO_MLOCK + /* Count */ #pragma GCC unroll 64 for(int i = 0; i < 256; ++i) { @@ -179,6 +188,10 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { } } #endif /* NO_EXTRA_BIT */ +#ifndef NO_MLOCK + munlock(arr, n * sizeof(uint32_t)); + munlock(temparr, n * sizeof(uint32_t)); +#endif // !NO_MLOCK } #endif /* THIER_SORT3_H */ diff --git a/ypsu.cpp b/ypsu.cpp index 57388ef..e3afa07 100644 --- a/ypsu.cpp +++ b/ypsu.cpp @@ -895,8 +895,8 @@ int main(int argc, char **argv) { printf("Sorting %d elements:\n\n", n); // Uncomment this for profiling and alg! - measure_single(n); - return 0; + //measure_single(n); + //return 0; for (auto inputtype : inputtypes) { printf("%10s", inputtype.c_str());