restrict keyword added and perf.cpp added

This commit is contained in:
Richard Thier 2025-04-01 20:01:12 +02:00
parent bd6c1e2b18
commit 0a5204c1cc
2 changed files with 97 additions and 2 deletions

View File

@ -8,6 +8,17 @@
#include <stdlib.h> #include <stdlib.h>
#endif /* NO_CSTDLIB */ #endif /* NO_CSTDLIB */
#ifdef __cplusplus
// C++-specific logic
#if defined(__GNUC__) || defined(__clang__)
#define restrict __restrict__ // GCC/Clang
#elif defined(_MSC_VER)
#define restrict __restrict // MSVC
#else
#error "Compiler not supported for 'restrict' keyword in C++"
#endif
#endif
/* Currently a single integer is enough */ /* Currently a single integer is enough */
typedef uint32_t rand_state; typedef uint32_t rand_state;
@ -30,7 +41,7 @@ static inline uint32_t lcg(rand_state *state) {
} }
/** Pick a "reasonably random" number in [0, until-1] without modulus */ /** Pick a "reasonably random" number in [0, until-1] without modulus */
static inline uint32_t rand_until(rand_state *state, uint32_t until) { static inline uint32_t rand_until(rand_state *restrict state, uint32_t until) {
uint32_t rand = lcg(state); uint32_t rand = lcg(state);
// Multiply by "until", take the upper 32 bits of the 64-bit result // Multiply by "until", take the upper 32 bits of the 64-bit result
return (uint32_t)(((uint64_t)rand * until) >> 32); return (uint32_t)(((uint64_t)rand * until) >> 32);
@ -44,7 +55,7 @@ static inline uint32_t rand_until(rand_state *state, uint32_t until) {
* @param to The biggest possible value + 1 * @param to The biggest possible value + 1
* @returns A value in [from, to) interval * @returns A value in [from, to) interval
*/ */
static inline uint32_t rand_between(rand_state *state, uint32_t from, uint32_t to) { static inline uint32_t rand_between(rand_state *restrict state, uint32_t from, uint32_t to) {
return from + rand_until(state, to - from); return from + rand_until(state, to - from);
} }

84
perf.cpp Normal file
View File

@ -0,0 +1,84 @@
#include <cstdio>
#include <cstdlib>
#include <chrono>
#include "fastrand.h"
#define N 9999999
#define M 99999999
#define FROM 100
#define TO 576
int main() {
// Init
srand((unsigned int)time(NULL));
rand_state rs = init_rand();
uint32_t sum = 0; // to avoid compiler optimizing out stuff
printf("Full range generation perf - %d number of cases:\n", N);
auto t0 = std::chrono::high_resolution_clock::now();
// rand
for (int i = 0; i < N; ++i) {
sum += rand();
}
auto t1 = std::chrono::high_resolution_clock::now();
// arc4
for (int i = 0; i < N; ++i) {
sum += arc4random();
}
auto t2 = std::chrono::high_resolution_clock::now();
// lcg
for (int i = 0; i < N; ++i) {
sum += lcg(&rs);
}
auto t3 = std::chrono::high_resolution_clock::now();
// results 1
auto rand_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0);
auto arc4_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1);
auto lcg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t3 - t2);
printf("Time (rand): %.3f ms.\n", rand_elapsed.count() * 1e-6);
printf("Time (arc4): %.3f ms.\n", arc4_elapsed.count() * 1e-6);
printf("Time (lcg): %.3f ms.\n", lcg_elapsed.count() * 1e-6);
printf("Modulo VS nomod perf for rand_between (both LCG) - %d number of cases:\n", M);
auto t4 = std::chrono::high_resolution_clock::now();
// lcg + modulo
for (int i = 0; i < N; ++i) {
sum += FROM + (lcg(&rs) % (TO - FROM));
}
auto t5 = std::chrono::high_resolution_clock::now();
// rand_between (also LCG, but no modulus)
for (int i = 0; i < N; ++i) {
sum += rand_between(&rs, FROM, TO);
}
auto t6 = std::chrono::high_resolution_clock::now();
// results 2
auto mod_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t5 - t4);
auto between_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t6 - t5);
uint32_t choice = rand_between(&rs, FROM, TO);
printf("lcg + modulo [%u, %u): %.3f ms.\n", FROM, TO, mod_elapsed.count() * 1e-6);
printf("rand_between [%u, %u): %.3f ms.\n", FROM, TO, between_elapsed.count() * 1e-6);
// checksum - avoid optimizing out loops
printf("Checksum: 0x%x\n", sum);
return 0;
}