finally again a real optimization and API for reusal - even faster for non-reused
This commit is contained in:
parent
3490201420
commit
a044787846
123
magyarsort.h
123
magyarsort.h
@ -216,17 +216,38 @@ namespace MagyarSort {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* Sort the given array (in-place sorting) with the given size.
|
* Sort the given array (in-place sorting) with the given size.
|
||||||
*
|
*
|
||||||
* Rem.: If you use the VectorGiverWithReuse please remind yourself to Gc() it time-to-time!
|
* Rem.: If you use the VectorGiverWithReuse please remind yourself to Gc() it time-to-time!
|
||||||
*
|
*
|
||||||
|
* Beware: GC needs to happen on all threads that use us!
|
||||||
|
*
|
||||||
* @param arr The array to sort. Result will be in the same array - as sorted.
|
* @param arr The array to sort. Result will be in the same array - as sorted.
|
||||||
* @param size The lenght of the array.
|
* @param size The lenght of the array.
|
||||||
* @param VectorGiver is either VectorGiverHeap or VectorGiverWithReuse. Have Give(size_t size, ...) returning value or ref.
|
* @param REUSE OPTIONAL: When true, we reuse the array instead of always gettin' and releasin' from da heap.
|
||||||
|
* @param GC OPTIONAL: When true, we garbage collect memory from previous sorts if REUSE is true.
|
||||||
|
* @param GC_WITHOUT_SORT OPTIONAL: When true, we "just GC" but do not sort in case of GC is true.
|
||||||
*/
|
*/
|
||||||
//template<typename VectorGiver = VectorGiverWithReuse>
|
template<bool REUSE = false, bool GC = false, bool GC_WITHOUT_SORT = false>
|
||||||
inline void sort(uint32_t arr[], size_t size) noexcept {
|
inline void __attribute__((always_inline)) sort_impl(uint32_t arr[], size_t size) noexcept {
|
||||||
|
// Most funny optimization is this multiply here :-)
|
||||||
|
//
|
||||||
|
// Literally.. come on.. this makes it nearly a compile-time, macro-like
|
||||||
|
// ifdef-like thing as we avoid memory allocations of size BUT also we
|
||||||
|
// optimize the first call for sort when we REUSE the array so size is fine!
|
||||||
|
static thread_local std::vector<uint32_t> arc(size * REUSE);
|
||||||
|
|
||||||
|
// "Garbage-collection"
|
||||||
|
if(GC) {
|
||||||
|
arc = std::vector<uint32_t>();
|
||||||
|
// This must be implemented, because we can only access
|
||||||
|
// the static in our function body so this is the "way".
|
||||||
|
if(GC_WITHOUT_SORT) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Holds "digit" occurences, prefix sums, whatevers
|
// Holds "digit" occurences, prefix sums, whatevers
|
||||||
// First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB
|
// First "DIGIT_RANGE" elem is for MSB "DIGITS", last is for LSB
|
||||||
static thread_local size_t radics[DIGITS * DIGIT_RANGE];
|
static thread_local size_t radics[DIGITS * DIGIT_RANGE];
|
||||||
@ -250,10 +271,19 @@ namespace MagyarSort {
|
|||||||
// above already anyways...
|
// above already anyways...
|
||||||
|
|
||||||
// Regular radix sort needs a copy, see: https://www.youtube.com/watch?v=ujb2CIWE8zY
|
// Regular radix sort needs a copy, see: https://www.youtube.com/watch?v=ujb2CIWE8zY
|
||||||
|
// But instead of the below, we do a trickery...
|
||||||
|
//
|
||||||
//std::vector<uint32_t> arc(size);
|
//std::vector<uint32_t> arc(size);
|
||||||
//auto arc = VectorGiver::Give(size); // "auto" is needed for this to perform well with some givers!
|
//auto arc = VectorGiver::Give(size); // "auto" is needed for this to perform well with some givers!
|
||||||
static thread_local std::vector<uint32_t> arc(size);
|
//
|
||||||
arc.resize(size); // JHP
|
// Rem.: The branch is optimized out in compile time!
|
||||||
|
if(REUSE) {
|
||||||
|
arc.resize(size);
|
||||||
|
} else {
|
||||||
|
// Must not be .clean() !!!
|
||||||
|
// We must regain memory of previous!
|
||||||
|
arc = std::move(std::vector<uint32_t>(size));
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t *from = arr;
|
uint32_t *from = arr;
|
||||||
uint32_t *to = &arc[0];
|
uint32_t *to = &arc[0];
|
||||||
@ -262,11 +292,90 @@ namespace MagyarSort {
|
|||||||
|
|
||||||
// With an other API we could spare this copy if we can delete original arr and return ptr or something...
|
// With an other API we could spare this copy if we can delete original arr and return ptr or something...
|
||||||
// I am fine with this... this is not my main idea anyways, just little ILP tweak to regular radix sort
|
// I am fine with this... this is not my main idea anyways, just little ILP tweak to regular radix sort
|
||||||
//if(to != arr) { // <- logically, but bad they are already swapped here!!! BEWARE
|
//if(to != arr) // <- logically, but bad they are already swapped here!!! BEWARE
|
||||||
if(from != arr) { // <- in reality this is what we want because of last swap happened anyways!
|
if(from != arr) { // <- in reality this is what we want because of last swap happened anyways!
|
||||||
memcpy(arr, from, size);
|
memcpy(arr, from, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Garbage collect reused data structures from last call.
|
||||||
|
*
|
||||||
|
* This is optimized and is a NO-OP if MAGYAR_SORT_DEFAULT_REUSE is not defined!
|
||||||
|
* - unless you use the FORCE! May it be with you if you need it.
|
||||||
|
*
|
||||||
|
* @param FORCE OPTIONAL: When true, the gc happens even if MAGYAR_SORT_DEFAULT_REUSE is not defined!
|
||||||
|
*/
|
||||||
|
template<bool FORCE = false>
|
||||||
|
inline void gc() noexcept {
|
||||||
|
if(FORCE) {
|
||||||
|
// Only GC-ing
|
||||||
|
MagyarSort::sort_impl<true, true, true>(nullptr, 0);
|
||||||
|
} else {
|
||||||
|
#ifdef MAGYAR_SORT_DEFAULT_REUSE
|
||||||
|
// Only GC-ing
|
||||||
|
MagyarSort::sort_impl<true, true, true>(nullptr, 0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sort the given array (in-place sorting) with the given size.
|
||||||
|
*
|
||||||
|
* Rem.: Please remind yourself to cc() from time-to-time!
|
||||||
|
* Rem.: Thread-safe to use!
|
||||||
|
*
|
||||||
|
* Beware: MagyarSort::gc<true>(); needs to happen on all threads that use this variant otherwise memory leaks away!
|
||||||
|
* Please mind the "true" template parameter that forces the GC even when sort by default not reuses...
|
||||||
|
*
|
||||||
|
* @param arr The array to sort. Result will be in the same array - as sorted.
|
||||||
|
* @param size The lenght of the array.
|
||||||
|
* @param GC OPTIONAL: When true, we garbage collect before this sort - so cached memory size will be "size" elems.
|
||||||
|
*/
|
||||||
|
template<bool GC = false>
|
||||||
|
inline void __attribute__((always_inline)) sort_reuse(uint32_t arr[], size_t size) noexcept {
|
||||||
|
// Reuse the temporary vectors across runs
|
||||||
|
// This results in much less heap allocations and much faster on gcc
|
||||||
|
// and also a bit faster on clang too.
|
||||||
|
MagyarSort::sort_impl<true>(arr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sort the given array (in-place sorting) with the given size.
|
||||||
|
*
|
||||||
|
* Rem.: Thread-safe to use!
|
||||||
|
*
|
||||||
|
* Beware: MagyarSort::gc(); needs to happen on all threads that use this variant otherwise memory leaks away!
|
||||||
|
*
|
||||||
|
* @param arr The array to sort. Result will be in the same array - as sorted.
|
||||||
|
* @param size The lenght of the array.
|
||||||
|
*/
|
||||||
|
inline void __attribute__((always_inline)) sort_no_reuse(uint32_t arr[], size_t size) noexcept {
|
||||||
|
// We use the heap once per every call...
|
||||||
|
// This is safer and we do not need garbage collecting
|
||||||
|
MagyarSort::sort_impl(arr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sort the given array (in-place sorting) with the given size.
|
||||||
|
*
|
||||||
|
* Rem.: If you use the VectorGiverWithReuse please remind yourself to Gc() it time-to-time!
|
||||||
|
*
|
||||||
|
* Beware: MagyarSort::gc(); should be called after "sort bursts" (consecutive fast sorts of when you need memory
|
||||||
|
* on all threads that use this variant otherwise memory leaks away as biggest sorted array keeps being in ram!
|
||||||
|
* This depends on the config #define MAGYAR_SORT_DEFAULT_REUSE is defined or not. Define and you get reuse
|
||||||
|
* and if you get reuse you can call multiple sorts with reused temporary buffers that you gc() afterwards!
|
||||||
|
*
|
||||||
|
* @param arr The array to sort. Result will be in the same array - as sorted.
|
||||||
|
* @param size The lenght of the array.
|
||||||
|
*/
|
||||||
|
inline void sort(uint32_t arr[], size_t size) noexcept {
|
||||||
|
#ifdef MAGYAR_SORT_DEFAULT_REUSE
|
||||||
|
MagyarSort::sort_reuse(arr, size);
|
||||||
|
#else
|
||||||
|
MagyarSort::sort_no_reuse(arr, size);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -14,6 +14,7 @@
|
|||||||
#include "quicksort-all.cpp"
|
#include "quicksort-all.cpp"
|
||||||
#include "avx2-altquicksort.h"
|
#include "avx2-altquicksort.h"
|
||||||
//#include "avx2-nate-quicksort.cpp"
|
//#include "avx2-nate-quicksort.cpp"
|
||||||
|
#define MAGYAR_SORT_DEFAULT_REUSE
|
||||||
#include "../magyarsort.h" // mine
|
#include "../magyarsort.h" // mine
|
||||||
#include "avx2-natenodutch-quicksort.h"
|
#include "avx2-natenodutch-quicksort.h"
|
||||||
#define USE_RDTSC // undef to get measurments in seconds
|
#define USE_RDTSC // undef to get measurments in seconds
|
||||||
|
|||||||
Binary file not shown.
2
test.cpp
2
test.cpp
@ -32,6 +32,8 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <algorithm> // std::sort
|
#include <algorithm> // std::sort
|
||||||
|
|
||||||
|
#define MAGYAR_SORT_DEFAULT_REUSE
|
||||||
#include "magyarsort.h"
|
#include "magyarsort.h"
|
||||||
|
|
||||||
#ifdef SKA_SORT
|
#ifdef SKA_SORT
|
||||||
|
|||||||
4
ypsu.cpp
4
ypsu.cpp
@ -11,7 +11,11 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "ska_sort.hpp"
|
#include "ska_sort.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
#define MAGYAR_SORT_DEFAULT_REUSE
|
||||||
#include "magyarsort.h"
|
#include "magyarsort.h"
|
||||||
|
|
||||||
std::map<std::string, double> results;
|
std::map<std::string, double> results;
|
||||||
std::map<std::string, double> worst;
|
std::map<std::string, double> worst;
|
||||||
void measure(const std::string &inputtype, const std::string &name,
|
void measure(const std::string &inputtype, const std::string &name,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user