Revert "vsort version that got slower, but is really funny template code"
This reverts commit fd35dbc51b63fa97ff5a9d7a823cdfa271b99a43.
This commit is contained in:
parent
fd35dbc51b
commit
a947cda58d
57
ypsu.cpp
57
ypsu.cpp
@ -183,59 +183,14 @@
|
|||||||
for (int i = 0; i < n; i++) a[bucketdata[buf[i] >> 24 & 0xff]++] = buf[i];
|
for (int i = 0; i < n; i++) a[bucketdata[buf[i] >> 24 & 0xff]++] = buf[i];
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int DIGIT>
|
|
||||||
struct VecInitMagic : public VecInitMagic<DIGIT - 1> {
|
|
||||||
inline static thread_local std::vector<uint32_t> v {256}; // like a static v.reserve call becuz: *
|
|
||||||
inline __attribute__((always_inline)) VecInitMagic() noexcept
|
|
||||||
: VecInitMagic<DIGIT -1 >() {
|
|
||||||
v.clear(); // * - but also needed for subsequent calls
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<>
|
|
||||||
struct VecInitMagic<-1> {};
|
|
||||||
|
|
||||||
/** Recursive Functor: no class should be generated I think (compiler should be smart) */
|
|
||||||
template<int VI>
|
|
||||||
struct VecAccMagic : public VecAccMagic<VI - 1> {
|
|
||||||
inline __attribute__((always_inline)) VecAccMagic(int i) noexcept
|
|
||||||
: VecAccMagic<VI -1 >(i) {
|
|
||||||
if(i != VI) {
|
|
||||||
// Needed otherwise bunch
|
|
||||||
// of brand mispredicts can
|
|
||||||
// happen because this should
|
|
||||||
// be the common case, not the
|
|
||||||
// one when we find the vector!
|
|
||||||
return;
|
|
||||||
} else {
|
|
||||||
this->foundVec = &(VecInitMagic<VI>::v);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
/** Ends template recursion */
|
|
||||||
template<>
|
|
||||||
struct VecAccMagic<-1> {
|
|
||||||
static thread_local std::vector<uint32_t> NotFound;
|
|
||||||
std::vector<uint32_t> *foundVec;
|
|
||||||
inline VecAccMagic(int i) noexcept: foundVec() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
void vsort(uint32_t *a, int n) {
|
void vsort(uint32_t *a, int n) {
|
||||||
static thread_local VecInitMagic<255> bts;
|
thread_local std::vector<uint32_t> bts[256];
|
||||||
for (int shift = 0; shift < 32; shift += 8) {
|
for (int shift = 0; shift < 32; shift += 8) {
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) bts[a[i] >> shift & 0xff].push_back(a[i]);
|
||||||
VecAccMagic<255> vba(a[i] >> shift & 0xff);
|
for (int bt = 0, k = 0; bt < 256; bt++) {
|
||||||
auto &bt = vba.foundVec;
|
memcpy(a + k, &bts[bt][0], bts[bt].size() * sizeof(a[0]));
|
||||||
bt->push_back(a[i]);
|
k += bts[bt].size();
|
||||||
}
|
bts[bt].clear();
|
||||||
|
|
||||||
for (int bti = 0, k = 0; bti < 256; bti++) {
|
|
||||||
VecAccMagic<255> vba(bti);
|
|
||||||
auto &bt = vba.foundVec;
|
|
||||||
memcpy(a + k, &((*bt)[0]), bt->size() * sizeof(a[0]));
|
|
||||||
k += bt->size();
|
|
||||||
bt->clear();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user