restrict keyword added and perf.cpp added
This commit is contained in:
parent
bd6c1e2b18
commit
0a5204c1cc
15
fastrand.h
15
fastrand.h
@ -8,6 +8,17 @@
|
||||
#include <stdlib.h>
|
||||
#endif /* NO_CSTDLIB */
|
||||
|
||||
#ifdef __cplusplus
|
||||
// C++-specific logic
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define restrict __restrict__ // GCC/Clang
|
||||
#elif defined(_MSC_VER)
|
||||
#define restrict __restrict // MSVC
|
||||
#else
|
||||
#error "Compiler not supported for 'restrict' keyword in C++"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Currently a single integer is enough */
|
||||
typedef uint32_t rand_state;
|
||||
|
||||
@ -30,7 +41,7 @@ static inline uint32_t lcg(rand_state *state) {
|
||||
}
|
||||
|
||||
/** Pick a "reasonably random" number in [0, until-1] without modulus */
|
||||
static inline uint32_t rand_until(rand_state *state, uint32_t until) {
|
||||
static inline uint32_t rand_until(rand_state *restrict state, uint32_t until) {
|
||||
uint32_t rand = lcg(state);
|
||||
// Multiply by "until", take the upper 32 bits of the 64-bit result
|
||||
return (uint32_t)(((uint64_t)rand * until) >> 32);
|
||||
@ -44,7 +55,7 @@ static inline uint32_t rand_until(rand_state *state, uint32_t until) {
|
||||
* @param to The biggest possible value + 1
|
||||
* @returns A value in [from, to) interval
|
||||
*/
|
||||
static inline uint32_t rand_between(rand_state *state, uint32_t from, uint32_t to) {
|
||||
static inline uint32_t rand_between(rand_state *restrict state, uint32_t from, uint32_t to) {
|
||||
return from + rand_until(state, to - from);
|
||||
}
|
||||
|
||||
|
||||
84
perf.cpp
Normal file
84
perf.cpp
Normal file
@ -0,0 +1,84 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <chrono>
|
||||
#include "fastrand.h"
|
||||
|
||||
#define N 9999999
|
||||
#define M 99999999
|
||||
#define FROM 100
|
||||
#define TO 576
|
||||
|
||||
int main() {
|
||||
// Init
|
||||
srand((unsigned int)time(NULL));
|
||||
rand_state rs = init_rand();
|
||||
uint32_t sum = 0; // to avoid compiler optimizing out stuff
|
||||
|
||||
printf("Full range generation perf - %d number of cases:\n", N);
|
||||
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// rand
|
||||
for (int i = 0; i < N; ++i) {
|
||||
sum += rand();
|
||||
}
|
||||
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// arc4
|
||||
for (int i = 0; i < N; ++i) {
|
||||
sum += arc4random();
|
||||
}
|
||||
|
||||
auto t2 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// lcg
|
||||
for (int i = 0; i < N; ++i) {
|
||||
sum += lcg(&rs);
|
||||
}
|
||||
|
||||
auto t3 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// results 1
|
||||
|
||||
auto rand_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0);
|
||||
auto arc4_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1);
|
||||
auto lcg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t3 - t2);
|
||||
|
||||
printf("Time (rand): %.3f ms.\n", rand_elapsed.count() * 1e-6);
|
||||
printf("Time (arc4): %.3f ms.\n", arc4_elapsed.count() * 1e-6);
|
||||
printf("Time (lcg): %.3f ms.\n", lcg_elapsed.count() * 1e-6);
|
||||
|
||||
printf("Modulo VS nomod perf for rand_between (both LCG) - %d number of cases:\n", M);
|
||||
|
||||
auto t4 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// lcg + modulo
|
||||
for (int i = 0; i < N; ++i) {
|
||||
sum += FROM + (lcg(&rs) % (TO - FROM));
|
||||
}
|
||||
|
||||
auto t5 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// rand_between (also LCG, but no modulus)
|
||||
for (int i = 0; i < N; ++i) {
|
||||
sum += rand_between(&rs, FROM, TO);
|
||||
}
|
||||
|
||||
auto t6 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// results 2
|
||||
|
||||
auto mod_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t5 - t4);
|
||||
auto between_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t6 - t5);
|
||||
|
||||
uint32_t choice = rand_between(&rs, FROM, TO);
|
||||
printf("lcg + modulo [%u, %u): %.3f ms.\n", FROM, TO, mod_elapsed.count() * 1e-6);
|
||||
printf("rand_between [%u, %u): %.3f ms.\n", FROM, TO, between_elapsed.count() * 1e-6);
|
||||
|
||||
// checksum - avoid optimizing out loops
|
||||
|
||||
printf("Checksum: 0x%x\n", sum);
|
||||
|
||||
return 0;
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user