diff --git a/thiersort3.h b/thiersort3.h index 383d430..ed80cc5 100644 --- a/thiersort3.h +++ b/thiersort3.h @@ -52,13 +52,13 @@ static inline uint32_t witch_bucket3(uint32_t key) { * @param n Number of elements in arr and temparr * @param rstate Create with sch_rand_state rstate = schwab_rand_state(junk_uint32_t); */ -static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { - int bucket[256]; /* Inclusive */ - int bucket_end[256]; /* Not inclusive */ +static inline void thiersort3(uint32_t *arr, uint32_t *temparr, uint32_t n) { + uint32_t bucket[256]; /* Inclusive */ + uint32_t bucket_end[256]; /* Not inclusive */ #ifndef NO_EXTRA_BIT - int bucket_leftend[256]; /* for extra 1bit split processing */ - int bucket_left[256]; /* for extra 1bit split processing */ + uint32_t bucket_leftend[256]; /* for extra 1bit split processing */ + uint32_t bucket_left[256]; /* for extra 1bit split processing */ #endif /* NO_EXTRA_BIT */ /* Check if need to sort at all - needed for invariants later */ @@ -68,25 +68,25 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { /* Count */ #pragma GCC unroll 64 - for(int i = 0; i < 256; ++i) { + for(uint32_t i = 0; i < 256; ++i) { bucket[i] = 0; } #pragma GCC unroll 128 - for(int i = 0; i < n; ++i) { + for(uint32_t i = 0; i < n; ++i) { ++bucket[witch_bucket3(arr[i])]; } /* Prefix sum (like in Magyarsort) */ uint32_t prev = 0; #pragma GCC unroll 4 - for (int i = 0; i < 256; i++) { + for (uint32_t i = 0; i < 256; i++) { bucket[i] += prev; prev = bucket[i]; } /* Save end-offsets */ #pragma GCC unroll 64 - for(int i = 0; i < 256; ++i) { + for(uint32_t i = 0; i < 256; ++i) { bucket_end[i] = bucket[i]; } @@ -95,7 +95,7 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { bucket_left[0] = 0; bucket_leftend[0] = 0; #pragma GCC unroll 64 - for(int i = 0; i < 4095; ++i) { + for(uint32_t i = 0; i < 255; ++i) { bucket_left[1 + i] = bucket[i]; bucket_leftend[1 + i] = bucket[i]; } @@ -106,18 +106,18 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { /* Move to the buckets */ /* Rem.: This also changes bucket[i] so they will point to bucket beginnings */ #pragma GCC unroll 128 - for(int i = 0; i < n; ++i) { + for(uint32_t i = 0; i < n; ++i) { uint32_t num = arr[i]; uint32_t witch = witch_bucket3(num); - int offset = (--bucket[witch]); + uint32_t offset = (--bucket[witch]); temparr[offset] = num; } /* temparr -> arr each bucket and sort them in-place */ #pragma GCC unroll 2 - for(int b = 0; b < 256; ++b) { - int begin = bucket[b]; - int end = bucket_end[b]; + for(uint32_t b = 0; b < 256; ++b) { + uint32_t begin = bucket[b]; + uint32_t end = bucket_end[b]; /* Ensure exists */ if(begin >= end) { @@ -137,10 +137,10 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { /* Rem.: This also changes bucket[i] so they will point to bucket.right beginnings */ /* Rem.: This also changes bucket_leftend[i] so they will point to bucket.left endings (needed in-process only) */ #pragma GCC unroll 64 - for(int i = 0; i < n; ++i) { + for(uint32_t i = 0; i < n; ++i) { uint32_t num = arr[i]; uint32_t witch = witch_bucket3(num); - int offset = (num & (1 << 27)) ? + uint32_t offset = (num & (1 << 27)) ? (--bucket[witch]) : (bucket_leftend[witch]++); @@ -149,12 +149,12 @@ static inline void thiersort3(uint32_t *arr, uint32_t *temparr, int n) { /* temparr -> arr each bucket and sort them in-place */ #pragma GCC unroll 2 - for(int b = 0; b < 256; ++b) { + for(uint32_t b = 0; b < 256; ++b) { assert(bucket_leftend[b] == bucket[b]); - int lbegin = bucket_left[b]; - int lend = bucket[b]; /* non-inclusive */ - int rbegin = bucket[b]; - int rend = bucket_end[b]; /* non-inclusive */ + uint32_t lbegin = bucket_left[b]; + uint32_t lend = bucket[b]; /* non-inclusive */ + uint32_t rbegin = bucket[b]; + uint32_t rend = bucket_end[b]; /* non-inclusive */ /* Ensure exists and process left part */ if(lbegin < lend) { diff --git a/threepass_xbit.h b/threepass_xbit.h index da05422..0bcbd34 100644 --- a/threepass_xbit.h +++ b/threepass_xbit.h @@ -31,17 +31,14 @@ static inline constexpr uint32_t min3u32_xb(uint32_t a, uint32_t b, uint32_t c) * @param buf Result array with the same size - result will be here * @param n The number of elements */ -static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { +static inline void threepass_xb(uint32_t *a, uint32_t *buf, uint32_t n) noexcept { assert(buf != NULL); - constexpr int shr1 = TPBX3 + TPBX2; - constexpr int shr2 = TPBX3; - constexpr int shr3 = 0; - constexpr int mask1 = (1 << TPBX1) - 1; - constexpr int mask2 = (1 << TPBX2) - 1; - constexpr int mask3 = (1 << TPBX3) - 1; - - /* helper buffers. */ - int sz = n * sizeof(a[0]); + constexpr uint32_t shr1 = TPBX3 + TPBX2; + constexpr uint32_t shr2 = TPBX3; + constexpr uint32_t shr3 = 0; + constexpr uint32_t mask1 = (1 << TPBX1) - 1; + constexpr uint32_t mask2 = (1 << TPBX2) - 1; + constexpr uint32_t mask3 = (1 << TPBX3) - 1; static thread_local uint32_t bucket1[1 << TPBX1]; memset(bucket1, 0, (1 << TPBX1) * sizeof(uint32_t)); @@ -67,7 +64,7 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { (1 << TPBX2), (1 << TPBX3) ); - int i = 0; + uint32_t i = 0; #pragma GCC unroll 8 for (; i < common; ++i) { bucket1[i] += prev1; @@ -78,17 +75,17 @@ static inline void threepass_xb(uint32_t *a, uint32_t *buf, int n) noexcept { prev3 = bucket3[i]; } /* Do remaining 1 */ - for (int j = i; j < (1 << TPBX1); ++j) { + for (uint32_t j = i; j < (1 << TPBX1); ++j) { bucket1[j] += prev1; prev1 = bucket1[j]; } /* Do remaining 2 */ - for (int j = i; j< (1 << TPBX2); ++j) { + for (uint32_t j = i; j< (1 << TPBX2); ++j) { bucket2[j] += prev2; prev2 = bucket2[j]; } /* Do remaining 3 */ - for (int j = i; j < (1 << TPBX3); ++j) { + for (uint32_t j = i; j < (1 << TPBX3); ++j) { bucket3[j] += prev3; prev3 = bucket3[j]; }