tpxb: tried non-temporal writes (bad for random writes)

This commit is contained in:
Richard Thier 2025-10-01 01:28:49 +02:00
parent 22ec030116
commit 98222d4494

View File

@ -14,6 +14,10 @@
#define TPBX3 9 // bottom #define TPBX3 9 // bottom
#endif /* CUSTOM_TPBX_BITS */ #endif /* CUSTOM_TPBX_BITS */
#ifdef TPXB_USE_NON_TEMPORAL_WRITES
#include <emmintrin.h> /* Required for the intrinsic */
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) noexcept { static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) noexcept {
return (a <= b) ? return (a <= b) ?
((a <= c) ? a : c) : ((a <= c) ? a : c) :
@ -101,7 +105,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
auto offset = --bucket3[bkeyni]; auto offset = --bucket3[bkeyni];
// Add to the proper target location // Add to the proper target location
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
buf[offset] = num; buf[offset] = num;
#else
_mm_stream_si32((int*)(&buf[offset]), num);
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
} }
// Mid digit buf->a // Mid digit buf->a
// right-to-left to ensure already sorted digits order we keep for iterations // right-to-left to ensure already sorted digits order we keep for iterations
@ -115,7 +123,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
auto offset = --bucket2[bkeyni]; auto offset = --bucket2[bkeyni];
// Add to the proper target location // Add to the proper target location
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
a[offset] = num; a[offset] = num;
#else
_mm_stream_si32((int*)(&a[offset]), num);
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
} }
// Top digit a->buf // Top digit a->buf
// right-to-left to ensure already sorted digits order we keep for iterations // right-to-left to ensure already sorted digits order we keep for iterations
@ -129,7 +141,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
auto offset = --bucket1[bkeyni]; auto offset = --bucket1[bkeyni];
// Add to the proper target location // Add to the proper target location
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
buf[offset] = num; buf[offset] = num;
#else
_mm_stream_si32((int*)(&buf[offset]), num);
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
} }
} }