further optimization - API change however is not a no-cost abstraction as it makes clang slower than original heap variant and g++ albeit faster than original it does not as fast as hardcoded - will investigave API change

This commit is contained in:
Richard Thier 2021-12-15 00:43:25 +01:00
parent a1d6e96f5a
commit 3490201420

View File

@ -161,7 +161,71 @@ namespace MagyarSort {
/* SORT */
/** Sort the given array (in-place sorting) with the given size */
/**
* Example: A simple "vector-giver" which provides a static thread_local that is reused
*
* This is to be used when you will call sort many times successively!
* If you forget to garbage-collect manually, use a VectorGiverHeap.
*
* XXX - BEWARE: This give references - that is also acceptable and supported!
*
* This is thread-safe (the Heap one also).
*/
struct VectorGiverWithReuse {
/**
* Give a reference to the vector to use as temporary.
* Will be resized, is reused so "leaks" memory to be the biggest sorted array size, but you can "Gc()".
*
* @param s The given vector should have this size.
* @param gc OPTIONAL: When true, we create a new empty shared vector. This saves memory after a big sort!
* @returns A reference that never go out of scope!
*/
static inline __attribute__((always_inline)) std::vector<uint32_t> &Give(size_t s, const bool gc = false) noexcept {
static thread_local std::vector<uint32_t> arc(s); // saves time on first call to have size here!
if(gc) { arc = std::vector<uint32_t>(); } // by default optimized out!
arc.resize(s); // JHP
// Safe because of static it will not go out of scope
return arc; // just a reference - no copy!
}
/** Release memory back to zero. After this, the first sort will need memory from heap again. */
inline __attribute__((always_inline)) void Gc() noexcept {
VectorGiverWithReuse::Give(0, true);
}
};
/**
* Example: A simple "vector-giver" which provides new vector from heap.
*
* This is thread-safe (the VectorGiverWithReuse one also).
*/
struct VectorGiverHeap {
/**
* Give a temporary vector which is to be created on heap and freed after sort.
*
* XXX - BEWARE: Please mind we do not return reference, but value here!
* This works because standard ENSURES return value optimization!
*
* @param s The given vector should have this size.
* @param gc OPTIONAL: When true, we create a new empty shared vector. This saves memory after a big sort!
* @returns A vector of appropriate size.
*/
inline __attribute__((always_inline)) std::vector<uint32_t> Give(size_t s) noexcept {
return std::vector<uint32_t>(s); // RVO ensured!
}
};
/**
* Sort the given array (in-place sorting) with the given size.
*
* Rem.: If you use the VectorGiverWithReuse please remind yourself to Gc() it time-to-time!
*
* @param arr The array to sort. Result will be in the same array - as sorted.
* @param size The lenght of the array.
* @param VectorGiver is either VectorGiverHeap or VectorGiverWithReuse. Have Give(size_t size, ...) returning value or ref.
*/
//template<typename VectorGiver = VectorGiverWithReuse>
inline void sort(uint32_t arr[], size_t size) noexcept {
// Holds "digit" occurences, prefix sums, whatevers
// First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB
@ -186,7 +250,10 @@ namespace MagyarSort {
// above already anyways...
// Regular radix sort needs a copy, see: https://www.youtube.com/watch?v=ujb2CIWE8zY
std::vector<uint32_t> arc(size);
//std::vector<uint32_t> arc(size);
//auto arc = VectorGiver::Give(size); // "auto" is needed for this to perform well with some givers!
static thread_local std::vector<uint32_t> arc(size);
arc.resize(size); // JHP
uint32_t *from = arr;
uint32_t *to = &arc[0];