removed non-temporal writes as too random patterns for it
This commit is contained in:
parent
a16917830f
commit
036725611b
@ -14,10 +14,6 @@
|
|||||||
#define TPBX3 9 // bottom
|
#define TPBX3 9 // bottom
|
||||||
#endif /* CUSTOM_TPBX_BITS */
|
#endif /* CUSTOM_TPBX_BITS */
|
||||||
|
|
||||||
#ifdef TPXB_USE_NON_TEMPORAL_WRITES
|
|
||||||
#include <emmintrin.h> /* Required for the intrinsic */
|
|
||||||
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
|
|
||||||
|
|
||||||
static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) noexcept {
|
static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) noexcept {
|
||||||
return (a <= b) ?
|
return (a <= b) ?
|
||||||
((a <= c) ? a : c) :
|
((a <= c) ? a : c) :
|
||||||
@ -105,11 +101,7 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
|
|||||||
auto offset = --bucket3[bkeyni];
|
auto offset = --bucket3[bkeyni];
|
||||||
|
|
||||||
// Add to the proper target location
|
// Add to the proper target location
|
||||||
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
|
|
||||||
buf[offset] = num;
|
buf[offset] = num;
|
||||||
#else
|
|
||||||
_mm_stream_si32((int*)(&buf[offset]), num);
|
|
||||||
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
|
|
||||||
}
|
}
|
||||||
// Mid digit buf->a
|
// Mid digit buf->a
|
||||||
// right-to-left to ensure already sorted digits order we keep for iterations
|
// right-to-left to ensure already sorted digits order we keep for iterations
|
||||||
@ -123,11 +115,7 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
|
|||||||
auto offset = --bucket2[bkeyni];
|
auto offset = --bucket2[bkeyni];
|
||||||
|
|
||||||
// Add to the proper target location
|
// Add to the proper target location
|
||||||
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
|
|
||||||
a[offset] = num;
|
a[offset] = num;
|
||||||
#else
|
|
||||||
_mm_stream_si32((int*)(&a[offset]), num);
|
|
||||||
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
|
|
||||||
}
|
}
|
||||||
// Top digit a->buf
|
// Top digit a->buf
|
||||||
// right-to-left to ensure already sorted digits order we keep for iterations
|
// right-to-left to ensure already sorted digits order we keep for iterations
|
||||||
@ -141,11 +129,7 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept {
|
|||||||
auto offset = --bucket1[bkeyni];
|
auto offset = --bucket1[bkeyni];
|
||||||
|
|
||||||
// Add to the proper target location
|
// Add to the proper target location
|
||||||
#ifndef TPXB_USE_NON_TEMPORAL_WRITES
|
|
||||||
buf[offset] = num;
|
buf[offset] = num;
|
||||||
#else
|
|
||||||
_mm_stream_si32((int*)(&buf[offset]), num);
|
|
||||||
#endif /* TPXB_USE_NON_TEMPORAL_WRITES */
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
4
ypsu.cpp
4
ypsu.cpp
@ -895,8 +895,8 @@ int main(int argc, char **argv) {
|
|||||||
printf("Sorting %d elements:\n\n", n);
|
printf("Sorting %d elements:\n\n", n);
|
||||||
|
|
||||||
// Uncomment this for profiling and alg!
|
// Uncomment this for profiling and alg!
|
||||||
//measure_single(n);
|
measure_single(n);
|
||||||
//return 0;
|
return 0;
|
||||||
|
|
||||||
for (auto inputtype : inputtypes) {
|
for (auto inputtype : inputtypes) {
|
||||||
printf("%10s", inputtype.c_str());
|
printf("%10s", inputtype.c_str());
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user