tpxb: tried non-temporal writes (bad for random writes)
This commit is contained in:
parent
22ec030116
commit
98222d4494
@ -14,6 +14,10 @@
|
|||||||
#define TPBX3 9 // bottom
|
#define TPBX3 9 // bottom
|
||||||
#endif /* CUSTOM_TPBX_BITS */
|
#endif /* CUSTOM_TPBX_BITS */
|
||||||
|
|
||||||
|
#ifdef TPXB_USE_NON_TEMPORAL_WRITES
|
||||||
|
#include <emmintrin.h> /* Required for the intrinsic */
|
||||||
|
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
|
||||||
|
|
||||||
static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) noexcept {
|
static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) noexcept {
|
||||||
return (a <= b) ?
|
return (a <= b) ?
|
||||||
((a <= c) ? a : c) :
|
((a <= c) ? a : c) :
|
||||||
@ -101,7 +105,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
|
|||||||
auto offset = --bucket3[bkeyni];
|
auto offset = --bucket3[bkeyni];
|
||||||
|
|
||||||
// Add to the proper target location
|
// Add to the proper target location
|
||||||
|
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
|
||||||
buf[offset] = num;
|
buf[offset] = num;
|
||||||
|
#else
|
||||||
|
_mm_stream_si32((int*)(&buf[offset]), num);
|
||||||
|
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
|
||||||
}
|
}
|
||||||
// Mid digit buf->a
|
// Mid digit buf->a
|
||||||
// right-to-left to ensure already sorted digits order we keep for iterations
|
// right-to-left to ensure already sorted digits order we keep for iterations
|
||||||
@ -115,7 +123,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
|
|||||||
auto offset = --bucket2[bkeyni];
|
auto offset = --bucket2[bkeyni];
|
||||||
|
|
||||||
// Add to the proper target location
|
// Add to the proper target location
|
||||||
|
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
|
||||||
a[offset] = num;
|
a[offset] = num;
|
||||||
|
#else
|
||||||
|
_mm_stream_si32((int*)(&a[offset]), num);
|
||||||
|
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
|
||||||
}
|
}
|
||||||
// Top digit a->buf
|
// Top digit a->buf
|
||||||
// right-to-left to ensure already sorted digits order we keep for iterations
|
// right-to-left to ensure already sorted digits order we keep for iterations
|
||||||
@ -129,7 +141,11 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
|
|||||||
auto offset = --bucket1[bkeyni];
|
auto offset = --bucket1[bkeyni];
|
||||||
|
|
||||||
// Add to the proper target location
|
// Add to the proper target location
|
||||||
|
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
|
||||||
buf[offset] = num;
|
buf[offset] = num;
|
||||||
|
#else
|
||||||
|
_mm_stream_si32((int*)(&buf[offset]), num);
|
||||||
|
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user