magyarsort/top2_partition.c
2025-05-09 01:10:12 +02:00

84 lines
2.0 KiB
C

#include <stdio.h>
#include <stdint.h>
#define TOP2(x) ((x) >> 30)
void swap(uint32_t* a, uint32_t* b) {
uint32_t tmp = *a;
*a = *b;
*b = tmp;
}
// TODO: instead of swaps, we need a single tmp var + only memcpys!!!
// TODO: we can do "ILP-memcpy": key from b2->b3, value from b2->b3, key from b1->b2, value from b1...
// Rem.: The latter is faster likely than calling memcpy function even though its simd-optimized...
void partition_top2(uint32_t* arr, int n) {
int b0 = 0, b1 = 0, b2 = 0, b3 = 0;
while (b3 < n) {
uint32_t val = arr[b3];
uint32_t top = TOP2(val);
if (top == 0) {
// Cascade: swap into b2, b1, b0
swap(&arr[b3], &arr[b2]);
swap(&arr[b2], &arr[b1]);
swap(&arr[b1], &arr[b0]);
b0++; b1++; b2++;
} else if (top == 1) {
// Cascade: swap into b2, b1
swap(&arr[b3], &arr[b2]);
swap(&arr[b2], &arr[b1]);
b1++; b2++;
} else if (top == 2) {
// Swap into b2
swap(&arr[b3], &arr[b2]);
b2++;
}
// else (top == 3), do nothing
b3++;
}
}
int main() {
uint32_t arr[] = {
0x40000001, // top 2 bits = 01
0x00000002, // 00
0xC0000003, // 11
0x80000004, // 10
0x00000005, // 00
0x40000006, // 01
0xC0000007, // 11
0x80000008, // 10
};
int n = sizeof(arr) / sizeof(arr[0]);
printf("Before:\n");
for (int i = 0; i < n; ++i)
printf("0x%08X ", arr[i]);
printf("\n");
// Optional: Show top 2 bits for verification
printf("\nTop 2 bits:\n");
for (int i = 0; i < n; ++i)
printf("%u ", TOP2(arr[i]));
printf("\n");
partition_top2(arr, n);
printf("\nAfter:\n");
for (int i = 0; i < n; ++i)
printf("0x%08X ", arr[i]);
printf("\n");
// Optional: Show top 2 bits for verification
printf("\nTop 2 bits:\n");
for (int i = 0; i < n; ++i)
printf("%u ", TOP2(arr[i]));
printf("\n");
return 0;
}