some fixes for mormord-ilp-richi

This commit is contained in:
Richard Thier 2024-04-12 00:37:50 +02:00
parent 23a5bb1d55
commit b2d66b7fd0

View File

@ -163,38 +163,49 @@ static inline uint32_t morgrab(uint32_t elem) noexcept {
* @param radics1 A 128-sized array for occurence counting the bottom partition. * @param radics1 A 128-sized array for occurence counting the bottom partition.
* @param radics2 A 128-sized array for occurence counting the top partition. * @param radics2 A 128-sized array for occurence counting the top partition.
* @param DIGIT The digit in question (for a morgrab<DIGIT>(..) call) * @param DIGIT The digit in question (for a morgrab<DIGIT>(..) call)
* @returns The partition boundaries - non-inclusive inner ends partitions. Empty partitions accordingly represented! * @returns The partition bounds are: [0..first) and [second..n) with logical means to mark empty partitions.
*/ */
template<int DIGIT> template<int DIGIT>
static inline std::pair<uint32_t, uint32_t> oc_bit_partition( static inline std::pair<uint32_t, uint32_t> oc_bit_partition(
uint32_t *a, uint32_t n, uint32_t *radics1, uint32_t *radics2) noexcept { uint32_t *a, uint32_t n, uint32_t *radics1, uint32_t *radics2) noexcept {
// See Hoare's OG quicksort why // See Hoare's OG quicksort why
uint32_t i = -1; int64_t i = 0;
uint32_t j = n; int64_t j = n - 1;
while(true) { while(true) {
// Move past well-placed ones // Move past well-placed ones
// And occurence count them // And occurence count them
// Rem.: In quicksort usually a do-while loop // Rem.: In quicksort usually a do-while loop
++i; while ((i < n) && !morbittop<DIGIT>(a[i])) { while ((i < j) && !morbittop<DIGIT>(a[i])) {
++radics1[morgrab<DIGIT>(a[i])]; ++radics1[morgrab<DIGIT>(a[i])];
++i; ++i;
} }
--j; while ((0 < j) && morbittop<DIGIT>(a[j])) { while ((i < j) && morbittop<DIGIT>(a[j])) {
++radics2[morgrab<DIGIT>(a[j])]; ++radics2[morgrab<DIGIT>(a[j])];
--j; --j;
} }
// If the indices crossed, return // If the indices crossed, return
// Rem.: Not >= to ensure occ. counts! See also: (*) // Rem.: Not >= to ensure occ. counts! See also: (*)
if(i > j) return std::make_pair(i, j); if(i > j) return std::make_pair(i, j + 1);
// Swap badly placed // Check for swap
// Rem.: No need occurence count here as above loops will handle!
if(i < j) { if(i < j) {
// Swap
// No need occurence count here as above loops will handle!
uint32_t tmp = a[i]; uint32_t tmp = a[i];
a[i] = a[j]; a[i] = a[j];
a[j] = tmp; a[j] = tmp;
} else {
// i == j case: count occurence properly for the one.
if(!morbittop<DIGIT>(a[j])) {
++radics1[morgrab<DIGIT>(a[i])];
++i;
} else {
++radics2[morgrab<DIGIT>(a[j])];
--j;
}
} }
} }
} }
@ -208,7 +219,7 @@ static inline void mormord_sort_impl(uint32_t *a, int n) noexcept {
uint32_t real_radics2[128 * 2] = {0}; uint32_t real_radics2[128 * 2] = {0};
// Count occurences and partition by topmost bit // Count occurences and partition by topmost bit
uint32_t n2 = oc_bit_partition<j>(a, n, radics1, radics2) + 1; std::pair<uint32_t, uint32_t> boundz = oc_bit_partition<j>(a, n, radics1, radics2);
/* Prefix sum + real radics calc O(256) */ /* Prefix sum + real radics calc O(256) */
/* Radics: */ /* Radics: */
@ -253,8 +264,8 @@ static inline void mormord_sort_impl(uint32_t *a, int n) noexcept {
// Inplace swap, with added ILP / branchless opt. // Inplace swap, with added ILP / branchless opt.
// Without it its data dependent like crazy... // Without it its data dependent like crazy...
uint32_t pivoti1 = 0; uint32_t pivoti1 = 0;
uint32_t pivoti2 = n2; uint32_t pivoti2 = boundz.second;
while((pivoti1 < n2) && (pivoti2 < n)) { while((pivoti1 < boundz.first) && (pivoti2 < n)) { // FIXME: needs two more "finisher-loops" behind this!!!
/* Pivot 1 */ /* Pivot 1 */