took out prefetch and added commented out pragmas - they not help

This commit is contained in:
Richard Thier 2021-12-17 22:09:35 +01:00
parent 3fdcaad537
commit be450086b5

View File

@ -9,6 +9,12 @@
* LICENCE: CC3 - look it up, you need to mention me but that is all
*/
/*
* Does not help much:
// #pragma GCC target ("avx2")
// #pragma GCC optimization ("unroll-loops")
*/
#include <cstdio>
#include <cstdint>
#include <cstring> // memset
@ -90,8 +96,7 @@ namespace MagyarSort {
#pragma GCC unroll 64
for(COUNTER_TYP i = 0; i < size; ++i) {
// Prefetch caches
__builtin_prefetch(&arr[i]); // TODO: is good?
__builtin_prefetch(&arr[i + 64]);
//__builtin_prefetch(&arr[i + 64]);
// Creates no object, struct is empty
OccurenceMagic<DIGITS - 1, COUNTER_TYP>(arr, i, radicsOut);
}
@ -143,8 +148,10 @@ namespace MagyarSort {
#pragma GCC unroll 64
for(COUNTER_TYP i = size; i > 0; --i) { // right-to-left to ensure already sorted digits order we keep for iterations
// Prefetch caches
/*
__builtin_prefetch(&from[i]); // TODO: is good?
if(i >= 64) { __builtin_prefetch(&from[i - 64]); } // TODO: manually unroll?
*/
// Get num and its new offset / location
auto num = from[i - 1];
auto digVal = getDigit<DIGIT>(num);