thiersort non-SSE implementation, but missing quicksort at end
This commit is contained in:
parent
caa3aecaad
commit
3c0b2d5202
284
thiersort.h
284
thiersort.h
@ -233,59 +233,273 @@
|
||||
* the float conversions AND shifts vectorized with SSE somewhat...
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#define TS_BOOL uint8_t
|
||||
#define TS_TRUE 1
|
||||
#define TS_FALSE 0
|
||||
|
||||
#ifdef TS_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif /* TS_SSE2 */
|
||||
#ifdef TS_SSE2_NEON
|
||||
#include "sse2neon.h"
|
||||
#define TS_SSE2
|
||||
#endif /* TS_SSE2_NEON */
|
||||
|
||||
#ifndef TS_CUSTOM_TYPES
|
||||
#define TSBOOL uint8_t
|
||||
#define TSTRUE 1
|
||||
#define TSFALSE 0
|
||||
#define TSU8 uint8_t
|
||||
#define TSU32 uint32_t
|
||||
#define TSI32 int32_t
|
||||
#endif /* TS_CUSTOM_TYPES */
|
||||
|
||||
/* Nearly same alg for all these keys */
|
||||
union thier_key {
|
||||
int32_t i;
|
||||
uint32_t u;
|
||||
union tskey {
|
||||
TSI32 i;
|
||||
TSU32 u;
|
||||
float f;
|
||||
};
|
||||
|
||||
struct thier_elem {
|
||||
union thier_key key;
|
||||
uint32_t i;
|
||||
struct tselem {
|
||||
union tskey key;
|
||||
TSU32 i;
|
||||
};
|
||||
|
||||
/* Forward decl of internal code */
|
||||
static inline void thiersort8_internal(
|
||||
const thier_elem *arr,
|
||||
const tselem *arr,
|
||||
uint32_t length,
|
||||
const int (*compar)(
|
||||
const union thier_key ak,
|
||||
const union thier_key bk,
|
||||
const void *aiptr,
|
||||
const void *biptr,
|
||||
void* (*malloc)(size_t size),
|
||||
void (*free)(void *ptr),
|
||||
const TSBOOL (*lt)(
|
||||
const union tskey ak,
|
||||
const union tskey bk,
|
||||
const TSU32 ai,
|
||||
const TSU32 bi,
|
||||
void *reent_data),
|
||||
const TS_BOOL isint,
|
||||
const TS_BOOL isunsigned,
|
||||
const TS_BOOL isfloat);
|
||||
const TSBOOL isint,
|
||||
const TSBOOL isunsigned,
|
||||
const TSBOOL isfloat);
|
||||
|
||||
/** The '<' operation used for int32 */
|
||||
static inline const int ts_lt_int(
|
||||
const union tskey ak,
|
||||
const union tskey bk,
|
||||
const TSU32 ai,
|
||||
const TSU32 bi,
|
||||
void *reent_data) { return (ak.i < bk.i); }
|
||||
|
||||
/** The '<' operation used for unsigned int32 */
|
||||
static inline const int ts_lt_uint(
|
||||
const union tskey ak,
|
||||
const union tskey bk,
|
||||
const TSU32 ai,
|
||||
const TSU32 bi,
|
||||
void *reent_data) { return (ak.u < bk.u); }
|
||||
|
||||
/** The '<' operation used for float */
|
||||
static inline const int ts_lt_float(
|
||||
const union tskey ak,
|
||||
const union tskey bk,
|
||||
const TSU32 ai,
|
||||
const TSU32 bi,
|
||||
void *reent_data) { return (ak.f < bk.f); }
|
||||
|
||||
/**
|
||||
* Sort {key, index32} elements where key is 32 bit integer.
|
||||
*/
|
||||
static inline void thiersort8_intkey(
|
||||
const thier_elem *arr,
|
||||
uint32_t length) {
|
||||
const tselem *arr,
|
||||
uint32_t length,
|
||||
void* (*malloc)(size_t size),
|
||||
void (*free)(void *ptr)) {
|
||||
thiersort8_internal(
|
||||
arr,
|
||||
length,
|
||||
malloc,
|
||||
free,
|
||||
ts_lt_int,
|
||||
TSTRUE,
|
||||
TSFALSE,
|
||||
TSFALSE);
|
||||
}
|
||||
|
||||
/** Internal code reuse (branches shoul be optimized out) */
|
||||
static inline void thiersort8_internal(
|
||||
thier_elem *arr,
|
||||
const uint32_t length,
|
||||
const int (*compar)(
|
||||
const union thier_key ak,
|
||||
const union thier_key bk,
|
||||
const void *aiptr,
|
||||
const void *biptr,
|
||||
void *reent_data),
|
||||
const TS_BOOL isint,
|
||||
const TS_BOOL isunsigned,
|
||||
const TS_BOOL isfloat) {
|
||||
// TODO: implement
|
||||
/**
|
||||
* Sort {key, index32} elements where key is unsigned 32 bit integer.
|
||||
*/
|
||||
static inline void thiersort8_uintkey(
|
||||
const tselem *arr,
|
||||
uint32_t length,
|
||||
void* (*malloc)(size_t size),
|
||||
void (*free)(void *ptr)) {
|
||||
thiersort8_internal(
|
||||
arr,
|
||||
length,
|
||||
malloc,
|
||||
free,
|
||||
ts_lt_uint,
|
||||
TSFALSE,
|
||||
TSTRUE,
|
||||
TSFALSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort {key, index32} elements where key is 32 bit integer.
|
||||
*/
|
||||
static inline void thiersort8_floatkey(
|
||||
const tselem *arr,
|
||||
uint32_t length,
|
||||
void* (*malloc)(size_t size),
|
||||
void (*free)(void *ptr)) {
|
||||
thiersort8_internal(
|
||||
arr,
|
||||
length,
|
||||
malloc,
|
||||
free,
|
||||
ts_lt_float,
|
||||
TSFALSE,
|
||||
TSFALSE,
|
||||
TSTRUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Do sign-trick based radix transform on nom-2s-complement 8 bit float!
|
||||
*
|
||||
* -1 means -127
|
||||
* -2 means -126
|
||||
* ...
|
||||
* -127 means -0
|
||||
* 0 means +0
|
||||
* 1 means 1
|
||||
* ...
|
||||
* 127 means 127
|
||||
*
|
||||
* We convert the above (in this order) into [0..255]!
|
||||
*/
|
||||
static inline TSU8 ts_floatsigntrick(TSU8 floatsigned) {
|
||||
if((floatsigned & 0x80) == 0) {
|
||||
/* Positive */
|
||||
return (128 + floatsigned);
|
||||
} else {
|
||||
/* Negative */
|
||||
return (128 - (floatsigned & 0x80));
|
||||
}
|
||||
}
|
||||
|
||||
/** Gets which bucket (index) the given key should go */
|
||||
static inline TSU8 ts_radixi(
|
||||
union tskey key,
|
||||
const TSBOOL isint,
|
||||
const TSBOOL isunsigned,
|
||||
const TSBOOL isfloat) {
|
||||
/* Compiler should optimize out branch here! */
|
||||
union tskey k;
|
||||
if(isint) {
|
||||
/* Sign bit can be 1! */
|
||||
k.f = (float)(arr[i].i);
|
||||
|
||||
/* 8 bit float */
|
||||
k.u >> 24;
|
||||
|
||||
/* Need float sign trickery */
|
||||
return ts_floatsigntrick((TSU8)k.u);
|
||||
}
|
||||
if(isunsigned) {
|
||||
/* Sign bit CANNOT be 1! */
|
||||
k.f = (float)(arr[i].u);
|
||||
|
||||
/* 8 bit float */
|
||||
k.u >> 24;
|
||||
|
||||
/* We are fine! */
|
||||
return (TSU8)k.u;
|
||||
}
|
||||
if(isfloat) {
|
||||
/* Sign bit can be 1! */
|
||||
k.f = (float) (arr[i].f);
|
||||
|
||||
/* 8 bit float */
|
||||
k.u >> 24;
|
||||
|
||||
/* Need float sign trickery */
|
||||
return ts_floatsigntrick((TSU8)k.u);
|
||||
}
|
||||
}
|
||||
|
||||
/** Internal code reuse (type-branches should be optimized out) */
|
||||
static inline void thiersort8_internal(
|
||||
struct tselem *arr,
|
||||
const uint32_t length,
|
||||
void* (*malloc)(size_t size),
|
||||
void (*free)(void *ptr),
|
||||
const TSBOOL (*lt)(
|
||||
const union tskey ak,
|
||||
const union tskey bk,
|
||||
const TSU32 ai,
|
||||
const TSU32 bi,
|
||||
void *reent_data),
|
||||
const TSBOOL isint,
|
||||
const TSBOOL isunsigned,
|
||||
const TSBOOL isfloat) {
|
||||
|
||||
/* Preparation */
|
||||
uint32_t radics[256] = {0};
|
||||
/* [from, to) index: only where prefix sums change - usually nonfull */
|
||||
uint32_t real_radics[256 * 2] = {0};
|
||||
tselem *arr2 = malloc((sizeof struct tselem) * length);
|
||||
|
||||
/* Step 1) Bucketing */
|
||||
#ifdef TS_SSE2
|
||||
// TODO: implement with SSE2
|
||||
#else
|
||||
/* Occurence counting O(n) */
|
||||
for(TSU32 i = 0; i < length; ++i) {
|
||||
++radics[ts_radixi(arr[i].key, isint, isunsigned, isfloat)];
|
||||
}
|
||||
|
||||
/* Prefix sum + real radics calc O(256) */
|
||||
/* Radics: */
|
||||
/* fr: {10, 20, 10, 0, 5, 15,...} */
|
||||
/* to: {10, 30, 40, 40, 45, 60,..} */
|
||||
/* Real radics: */
|
||||
/* to: {[0, 10], [10, 30], [30, 40], [40, 45], [45, 60]} */
|
||||
/* 0. 1. 2. 4. 5. */
|
||||
/* (because radix value 3 is not found in input) */
|
||||
TSU32 prev = 0;
|
||||
TSU32 reali = 0;
|
||||
for(int i = 0; i < 256; ++i) {
|
||||
if(radics[i] != 0) {
|
||||
radics[i] += prev;
|
||||
real_radics[reali] = prev;
|
||||
real_radics[reali + 1] = radics[i];
|
||||
prev = radics[i];
|
||||
reali += 2;
|
||||
} else {
|
||||
radics[i] += prev;
|
||||
prev = radics[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* Move elements according to their buckets O(n) */
|
||||
/* Right-to-left ensures keeping internal order */
|
||||
for(TSU32 i = length; i > 0; ++i) {
|
||||
TSU8 radi = ts_radixi(arr[i - 1].key, isint, isunsigned, isfloat);
|
||||
TSU32 offset = --radics[radi];
|
||||
arr2[offset] = arr[i - 1];
|
||||
}
|
||||
#endif /* TS_SSE2 */
|
||||
|
||||
/* Step 2) Sorting buckets contents - also moving back to original array */
|
||||
for(int i = 0; i < reali; i += 2) {
|
||||
/* inclusive */
|
||||
TSU32 from = real_radics[i];
|
||||
/* non-inclusive */
|
||||
TSU32 to = real_radics[i + 1];
|
||||
|
||||
/* TODO: Quicksort this bucket in O(mlogm), where m << n usually */
|
||||
}
|
||||
|
||||
/* Cleanup */
|
||||
free(arr2);
|
||||
}
|
||||
|
||||
#endif /* THIER_SORT_H */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user