vsort version that got slower, but is really funny template code
This commit is contained in:
parent
bff96c8f7f
commit
fd35dbc51b
57
ypsu.cpp
57
ypsu.cpp
@ -183,14 +183,59 @@
|
||||
for (int i = 0; i < n; i++) a[bucketdata[buf[i] >> 24 & 0xff]++] = buf[i];
|
||||
free(buf);
|
||||
}
|
||||
|
||||
template<int DIGIT>
|
||||
struct VecInitMagic : public VecInitMagic<DIGIT - 1> {
|
||||
inline static thread_local std::vector<uint32_t> v {256}; // like a static v.reserve call becuz: *
|
||||
inline __attribute__((always_inline)) VecInitMagic() noexcept
|
||||
: VecInitMagic<DIGIT -1 >() {
|
||||
v.clear(); // * - but also needed for subsequent calls
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct VecInitMagic<-1> {};
|
||||
|
||||
/** Recursive Functor: no class should be generated I think (compiler should be smart) */
|
||||
template<int VI>
|
||||
struct VecAccMagic : public VecAccMagic<VI - 1> {
|
||||
inline __attribute__((always_inline)) VecAccMagic(int i) noexcept
|
||||
: VecAccMagic<VI -1 >(i) {
|
||||
if(i != VI) {
|
||||
// Needed otherwise bunch
|
||||
// of brand mispredicts can
|
||||
// happen because this should
|
||||
// be the common case, not the
|
||||
// one when we find the vector!
|
||||
return;
|
||||
} else {
|
||||
this->foundVec = &(VecInitMagic<VI>::v);
|
||||
}
|
||||
}
|
||||
};
|
||||
/** Ends template recursion */
|
||||
template<>
|
||||
struct VecAccMagic<-1> {
|
||||
static thread_local std::vector<uint32_t> NotFound;
|
||||
std::vector<uint32_t> *foundVec;
|
||||
inline VecAccMagic(int i) noexcept: foundVec() {}
|
||||
};
|
||||
|
||||
void vsort(uint32_t *a, int n) {
|
||||
thread_local std::vector<uint32_t> bts[256];
|
||||
static thread_local VecInitMagic<255> bts;
|
||||
for (int shift = 0; shift < 32; shift += 8) {
|
||||
for (int i = 0; i < n; i++) bts[a[i] >> shift & 0xff].push_back(a[i]);
|
||||
for (int bt = 0, k = 0; bt < 256; bt++) {
|
||||
memcpy(a + k, &bts[bt][0], bts[bt].size() * sizeof(a[0]));
|
||||
k += bts[bt].size();
|
||||
bts[bt].clear();
|
||||
for (int i = 0; i < n; i++) {
|
||||
VecAccMagic<255> vba(a[i] >> shift & 0xff);
|
||||
auto &bt = vba.foundVec;
|
||||
bt->push_back(a[i]);
|
||||
}
|
||||
|
||||
for (int bti = 0, k = 0; bti < 256; bti++) {
|
||||
VecAccMagic<255> vba(bti);
|
||||
auto &bt = vba.foundVec;
|
||||
memcpy(a + k, &((*bt)[0]), bt->size() * sizeof(a[0]));
|
||||
k += bt->size();
|
||||
bt->clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user