From 036725611ba64a58201fa33dd23ca5e07c6d32f9 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Wed, 1 Oct 2025 03:24:08 +0200 Subject: [PATCH] removed non-temporal writes as too random patterns for it --- threepass_xbit.h | 16 ---------------- ypsu.cpp | 4 ++-- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/threepass_xbit.h b/threepass_xbit.h index da05422..4298664 100644 --- a/threepass_xbit.h +++ b/threepass_xbit.h @@ -14,10 +14,6 @@ #define TPBX3 9 // bottom #endif /* CUSTOM_TPBX_BITS */ -#ifdef TPXB_USE_NON_TEMPORAL_WRITES -#include /* Required for the intrinsic */ -#endif /* TPXB_USE_NON_TEMPORAL_WRITES */ - static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) noexcept { return (a <= b) ? ((a <= c) ? a : c) : @@ -105,11 +101,7 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { auto offset = --bucket3[bkeyni]; // Add to the proper target location -#ifndef TPXB_USE_NON_TEMPORAL_WRITES buf[offset] = num; -#else - _mm_stream_si32((int*)(&buf[offset]), num); -#endif /* TPXB_USE_NON_TEMPORAL_WRITES */ } // Mid digit buf->a // right-to-left to ensure already sorted digits order we keep for iterations @@ -123,11 +115,7 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { auto offset = --bucket2[bkeyni]; // Add to the proper target location -#ifndef TPXB_USE_NON_TEMPORAL_WRITES a[offset] = num; -#else - _mm_stream_si32((int*)(&a[offset]), num); -#endif /* TPXB_USE_NON_TEMPORAL_WRITES */ } // Top digit a->buf // right-to-left to ensure already sorted digits order we keep for iterations @@ -141,11 +129,7 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { auto offset = --bucket1[bkeyni]; // Add to the proper target location -#ifndef TPXB_USE_NON_TEMPORAL_WRITES buf[offset] = num; -#else - _mm_stream_si32((int*)(&buf[offset]), num); -#endif /* TPXB_USE_NON_TEMPORAL_WRITES */ } } diff --git a/ypsu.cpp b/ypsu.cpp index e3afa07..57388ef 100644 --- a/ypsu.cpp +++ b/ypsu.cpp @@ -895,8 +895,8 @@ int main(int argc, char **argv) { printf("Sorting %d elements:\n\n", n); // Uncomment this for profiling and alg! - //measure_single(n); - //return 0; + measure_single(n); + return 0; for (auto inputtype : inputtypes) { printf("%10s", inputtype.c_str());