some fixes for mormord-ilp-richi
This commit is contained in:
parent
23a5bb1d55
commit
b2d66b7fd0
33
ypsu.cpp
33
ypsu.cpp
@ -163,38 +163,49 @@ static inline uint32_t morgrab(uint32_t elem) noexcept {
|
||||
* @param radics1 A 128-sized array for occurence counting the bottom partition.
|
||||
* @param radics2 A 128-sized array for occurence counting the top partition.
|
||||
* @param DIGIT The digit in question (for a morgrab<DIGIT>(..) call)
|
||||
* @returns The partition boundaries - non-inclusive inner ends partitions. Empty partitions accordingly represented!
|
||||
* @returns The partition bounds are: [0..first) and [second..n) with logical means to mark empty partitions.
|
||||
*/
|
||||
template<int DIGIT>
|
||||
static inline std::pair<uint32_t, uint32_t> oc_bit_partition(
|
||||
uint32_t *a, uint32_t n, uint32_t *radics1, uint32_t *radics2) noexcept {
|
||||
// See Hoare's OG quicksort why
|
||||
uint32_t i = -1;
|
||||
uint32_t j = n;
|
||||
int64_t i = 0;
|
||||
int64_t j = n - 1;
|
||||
|
||||
while(true) {
|
||||
// Move past well-placed ones
|
||||
// And occurence count them
|
||||
// Rem.: In quicksort usually a do-while loop
|
||||
++i; while ((i < n) && !morbittop<DIGIT>(a[i])) {
|
||||
while ((i < j) && !morbittop<DIGIT>(a[i])) {
|
||||
++radics1[morgrab<DIGIT>(a[i])];
|
||||
++i;
|
||||
}
|
||||
--j; while ((0 < j) && morbittop<DIGIT>(a[j])) {
|
||||
while ((i < j) && morbittop<DIGIT>(a[j])) {
|
||||
++radics2[morgrab<DIGIT>(a[j])];
|
||||
--j;
|
||||
}
|
||||
|
||||
// If the indices crossed, return
|
||||
// Rem.: Not >= to ensure occ. counts! See also: (*)
|
||||
if(i > j) return std::make_pair(i, j);
|
||||
if(i > j) return std::make_pair(i, j + 1);
|
||||
|
||||
// Swap badly placed
|
||||
// Rem.: No need occurence count here as above loops will handle!
|
||||
// Check for swap
|
||||
if(i < j) {
|
||||
// Swap
|
||||
// No need occurence count here as above loops will handle!
|
||||
uint32_t tmp = a[i];
|
||||
a[i] = a[j];
|
||||
a[j] = tmp;
|
||||
} else {
|
||||
// i == j case: count occurence properly for the one.
|
||||
if(!morbittop<DIGIT>(a[j])) {
|
||||
++radics1[morgrab<DIGIT>(a[i])];
|
||||
++i;
|
||||
} else {
|
||||
++radics2[morgrab<DIGIT>(a[j])];
|
||||
--j;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -208,7 +219,7 @@ static inline void mormord_sort_impl(uint32_t *a, int n) noexcept {
|
||||
uint32_t real_radics2[128 * 2] = {0};
|
||||
|
||||
// Count occurences and partition by topmost bit
|
||||
uint32_t n2 = oc_bit_partition<j>(a, n, radics1, radics2) + 1;
|
||||
std::pair<uint32_t, uint32_t> boundz = oc_bit_partition<j>(a, n, radics1, radics2);
|
||||
|
||||
/* Prefix sum + real radics calc O(256) */
|
||||
/* Radics: */
|
||||
@ -253,8 +264,8 @@ static inline void mormord_sort_impl(uint32_t *a, int n) noexcept {
|
||||
// Inplace swap, with added ILP / branchless opt.
|
||||
// Without it its data dependent like crazy...
|
||||
uint32_t pivoti1 = 0;
|
||||
uint32_t pivoti2 = n2;
|
||||
while((pivoti1 < n2) && (pivoti2 < n)) {
|
||||
uint32_t pivoti2 = boundz.second;
|
||||
while((pivoti1 < boundz.first) && (pivoti2 < n)) { // FIXME: needs two more "finisher-loops" behind this!!!
|
||||
|
||||
/* Pivot 1 */
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user