diff --git a/thiersort2.h b/thiersort2.h index b47b04c..8a7b714 100644 --- a/thiersort2.h +++ b/thiersort2.h @@ -4,6 +4,21 @@ #include "qsort/schwab_sort.h" /* A non-implace tricky float-hackz based bucket sort variant. Uses schwabsort! */ +#ifdef _MSC_VER +#define KM_PREFETCH(x) +#define LIKELY(x) +#define UNLIKELY(x) +#define KM_NOINLINE __declspec(noinline) +#define KM_ALWAYS_INLINE __forceinline +#else +#define KM_PREFETCH(x) __builtin_prefetch(x) +#define LIKELY(x) __builtin_expect((x),1) +/* alternative: #define LIKELY(x) __builtin_expect(((x) != 0),1) */ +#define UNLIKELY(x) __builtin_expect((x),0) +#define KM_NOINLINE __attribute__ ((noinline)) +#define KM_ALWAYS_INLINE __attribute__ ((always_inline)) +#endif + /* Float and unsigned32 reinterpreter */ union th2_fu { float f; @@ -103,7 +118,7 @@ static inline void thiersort2(uint32_t *arr, uint32_t *temparr, int n, sch_rand_ uint32_t pivot = temparr[i]; #pragma GCC unroll 4 for(int j = begin + 1; j < end; ++j) { - if(temparr[j] == pivot) { + if(UNLIKELY(temparr[j] == pivot)) { /* swap to front partition */ ++i; uint32_t tmp = temparr[i]; diff --git a/ypsu.cpp b/ypsu.cpp index dfb7207..eff4284 100644 --- a/ypsu.cpp +++ b/ypsu.cpp @@ -842,7 +842,8 @@ void measure_single(int n) { std::vector v(n); v = geninput(inputtype, n); //measure(inputtype, "sp", [&] { spsort(&v[0], v.size()); }); - measure(inputtype, "magyar", [&] { MagyarSort::sort(&v[0], v.size()); }); + //measure(inputtype, "magyar", [&] { MagyarSort::sort(&v[0], v.size()); }); + measure(inputtype, "thier2", [&] { do_thier2(&v[0], v.size()); }); for (auto r : results) printf("%9.3fs", r.second); puts(""); @@ -858,9 +859,9 @@ void measure_single(int n) { int main(void) { //int n = 100000000; - //int n = 10000000; + int n = 10000000; //int n = 5000000; - int n = 1000000; + //int n = 1000000; //int n = 100000; //int n = 20001; //int n = 20000; @@ -874,8 +875,8 @@ int main(void) { printf("Sorting %d elements:\n\n", n); // Uncomment this for profiling and alg! - //measure_single(n); - //return 0; + measure_single(n); + return 0; for (auto inputtype : inputtypes) { printf("%10s", inputtype.c_str());