removed non-temporal writes as too random patterns for it

This commit is contained in:
Richard Thier 2025-10-01 03:24:08 +02:00
parent a16917830f
commit 036725611b
2 changed files with 2 additions and 18 deletions

View File

@ -14,10 +14,6 @@
#define TPBX3 9 // bottom #define TPBX3 9 // bottom
#endif /* CUSTOM_TPBX_BITS */ #endif /* CUSTOM_TPBX_BITS */
#ifdef TPXB_USE_NON_TEMPORAL_WRITES
#include <emmintrin.h> /* Required for the intrinsic */
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) noexcept { static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) noexcept {
return (a <= b) ? return (a <= b) ?
((a <= c) ? a : c) : ((a <= c) ? a : c) :
@ -105,11 +101,7 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
auto offset = --bucket3[bkeyni]; auto offset = --bucket3[bkeyni];
// Add to the proper target location // Add to the proper target location
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
buf[offset] = num; buf[offset] = num;
#else
_mm_stream_si32((int*)(&buf[offset]), num);
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
} }
// Mid digit buf->a // Mid digit buf->a
// right-to-left to ensure already sorted digits order we keep for iterations // right-to-left to ensure already sorted digits order we keep for iterations
@ -123,11 +115,7 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
auto offset = --bucket2[bkeyni]; auto offset = --bucket2[bkeyni];
// Add to the proper target location // Add to the proper target location
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
a[offset] = num; a[offset] = num;
#else
_mm_stream_si32((int*)(&a[offset]), num);
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
} }
// Top digit a->buf // Top digit a->buf
// right-to-left to ensure already sorted digits order we keep for iterations // right-to-left to ensure already sorted digits order we keep for iterations
@ -141,11 +129,7 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
auto offset = --bucket1[bkeyni]; auto offset = --bucket1[bkeyni];
// Add to the proper target location // Add to the proper target location
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
buf[offset] = num; buf[offset] = num;
#else
_mm_stream_si32((int*)(&buf[offset]), num);
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
} }
} }

View File

@ -895,8 +895,8 @@ int main(int argc, char **argv) {
printf("Sorting %d elements:\n\n", n); printf("Sorting %d elements:\n\n", n);
// Uncomment this for profiling and alg! // Uncomment this for profiling and alg!
//measure_single(n); measure_single(n);
//return 0; return 0;
for (auto inputtype : inputtypes) { for (auto inputtype : inputtypes) {
printf("%10s", inputtype.c_str()); printf("%10s", inputtype.c_str());