From 0a5204c1cc1482d555af269cdaf2980c5f76492d Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Tue, 1 Apr 2025 20:01:12 +0200 Subject: [PATCH] restrict keyword added and perf.cpp added --- fastrand.h | 15 ++++++++-- perf.cpp | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 perf.cpp diff --git a/fastrand.h b/fastrand.h index e1d5c9d..9d9e9df 100644 --- a/fastrand.h +++ b/fastrand.h @@ -8,6 +8,17 @@ #include #endif /* NO_CSTDLIB */ +#ifdef __cplusplus + // C++-specific logic + #if defined(__GNUC__) || defined(__clang__) + #define restrict __restrict__ // GCC/Clang + #elif defined(_MSC_VER) + #define restrict __restrict // MSVC + #else + #error "Compiler not supported for 'restrict' keyword in C++" + #endif +#endif + /* Currently a single integer is enough */ typedef uint32_t rand_state; @@ -30,7 +41,7 @@ static inline uint32_t lcg(rand_state *state) { } /** Pick a "reasonably random" number in [0, until-1] without modulus */ -static inline uint32_t rand_until(rand_state *state, uint32_t until) { +static inline uint32_t rand_until(rand_state *restrict state, uint32_t until) { uint32_t rand = lcg(state); // Multiply by "until", take the upper 32 bits of the 64-bit result return (uint32_t)(((uint64_t)rand * until) >> 32); @@ -44,7 +55,7 @@ static inline uint32_t rand_until(rand_state *state, uint32_t until) { * @param to The biggest possible value + 1 * @returns A value in [from, to) interval */ -static inline uint32_t rand_between(rand_state *state, uint32_t from, uint32_t to) { +static inline uint32_t rand_between(rand_state *restrict state, uint32_t from, uint32_t to) { return from + rand_until(state, to - from); } diff --git a/perf.cpp b/perf.cpp new file mode 100644 index 0000000..2d6317f --- /dev/null +++ b/perf.cpp @@ -0,0 +1,84 @@ +#include +#include +#include +#include "fastrand.h" + +#define N 9999999 +#define M 99999999 +#define FROM 100 +#define TO 576 + +int main() { + // Init + srand((unsigned int)time(NULL)); + rand_state rs = init_rand(); + uint32_t sum = 0; // to avoid compiler optimizing out stuff + + printf("Full range generation perf - %d number of cases:\n", N); + + auto t0 = std::chrono::high_resolution_clock::now(); + + // rand + for (int i = 0; i < N; ++i) { + sum += rand(); + } + + auto t1 = std::chrono::high_resolution_clock::now(); + + // arc4 + for (int i = 0; i < N; ++i) { + sum += arc4random(); + } + + auto t2 = std::chrono::high_resolution_clock::now(); + + // lcg + for (int i = 0; i < N; ++i) { + sum += lcg(&rs); + } + + auto t3 = std::chrono::high_resolution_clock::now(); + + // results 1 + + auto rand_elapsed = std::chrono::duration_cast(t1 - t0); + auto arc4_elapsed = std::chrono::duration_cast(t2 - t1); + auto lcg_elapsed = std::chrono::duration_cast(t3 - t2); + + printf("Time (rand): %.3f ms.\n", rand_elapsed.count() * 1e-6); + printf("Time (arc4): %.3f ms.\n", arc4_elapsed.count() * 1e-6); + printf("Time (lcg): %.3f ms.\n", lcg_elapsed.count() * 1e-6); + + printf("Modulo VS nomod perf for rand_between (both LCG) - %d number of cases:\n", M); + + auto t4 = std::chrono::high_resolution_clock::now(); + + // lcg + modulo + for (int i = 0; i < N; ++i) { + sum += FROM + (lcg(&rs) % (TO - FROM)); + } + + auto t5 = std::chrono::high_resolution_clock::now(); + + // rand_between (also LCG, but no modulus) + for (int i = 0; i < N; ++i) { + sum += rand_between(&rs, FROM, TO); + } + + auto t6 = std::chrono::high_resolution_clock::now(); + + // results 2 + + auto mod_elapsed = std::chrono::duration_cast(t5 - t4); + auto between_elapsed = std::chrono::duration_cast(t6 - t5); + + uint32_t choice = rand_between(&rs, FROM, TO); + printf("lcg + modulo [%u, %u): %.3f ms.\n", FROM, TO, mod_elapsed.count() * 1e-6); + printf("rand_between [%u, %u): %.3f ms.\n", FROM, TO, between_elapsed.count() * 1e-6); + + // checksum - avoid optimizing out loops + + printf("Checksum: 0x%x\n", sum); + + return 0; +}