From 98222d449407a30482c48d5dddd1769242ba5a2c Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Wed, 1 Oct 2025 01:28:49 +0200 Subject: [PATCH] tpxb: tried non-temporal writes (bad for random writes) --- threepass_xbit.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/threepass_xbit.h b/threepass_xbit.h index 4298664..da05422 100644 --- a/threepass_xbit.h +++ b/threepass_xbit.h @@ -14,6 +14,10 @@ #define TPBX3 9 // bottom #endif /* CUSTOM_TPBX_BITS */ +#ifdef TPXB_USE_NON_TEMPORAL_WRITES +#include /* Required for the intrinsic */ +#endif /* TPXB_USE_NON_TEMPORAL_WRITES */ + static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) noexcept { return (a <= b) ? ((a <= c) ? a : c) : @@ -101,7 +105,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { auto offset = --bucket3[bkeyni]; // Add to the proper target location +#ifndef TPXB_USE_NON_TEMPORAL_WRITES buf[offset] = num; +#else + _mm_stream_si32((int*)(&buf[offset]), num); +#endif /* TPXB_USE_NON_TEMPORAL_WRITES */ } // Mid digit buf->a // right-to-left to ensure already sorted digits order we keep for iterations @@ -115,7 +123,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { auto offset = --bucket2[bkeyni]; // Add to the proper target location +#ifndef TPXB_USE_NON_TEMPORAL_WRITES a[offset] = num; +#else + _mm_stream_si32((int*)(&a[offset]), num); +#endif /* TPXB_USE_NON_TEMPORAL_WRITES */ } // Top digit a->buf // right-to-left to ensure already sorted digits order we keep for iterations @@ -129,7 +141,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { auto offset = --bucket1[bkeyni]; // Add to the proper target location +#ifndef TPXB_USE_NON_TEMPORAL_WRITES buf[offset] = num; +#else + _mm_stream_si32((int*)(&buf[offset]), num); +#endif /* TPXB_USE_NON_TEMPORAL_WRITES */ } }