minor tweaking for more ILP

This commit is contained in:
Richard Thier 2021-12-13 03:48:17 +01:00
parent c2fc962766
commit 11ceee29a1

View File

@ -85,6 +85,7 @@ namespace MagyarSort {
};
static inline void countOccurences(uint32_t arr[], size_t size, size_t *radicsOut) noexcept {
#pragma GCC unroll 64
for(size_t i = 0; i < size; ++i) {
// Creates no object, struct is empty
OccurenceMagic<DIGITS - 1>(arr, i, radicsOut);
@ -127,12 +128,13 @@ namespace MagyarSort {
/** Recursive Functor: no class should be generated I think (compiler should be smart) */
template<int DIGIT>
struct RadixMagic : public RadixMagic<DIGIT - 1> {
inline RadixMagic(size_t *radics, uint32_t *&from, uint32_t *&to, size_t size) noexcept // BEWARE: "*&" needed to swap pointers..
inline __attribute__((always_inline)) RadixMagic(size_t *radics, uint32_t *&from, uint32_t *&to, size_t size) noexcept // BEWARE: "*&" needed to swap pointers..
: RadixMagic<DIGIT - 1>(radics, from, to, size) {
// DEBUG
//printf("%d before: ", DIGIT);
//debugArr(from, size);
#pragma GCC unroll 64
for(size_t i = size; i > 0; --i) { // right-to-left to ensure already sorted digits order we keep for iterations
// Get num and its new offset / location
auto num = from[i - 1];