fastrand/perf.cpp

156 lines
4.3 KiB
C++
Raw Normal View History

#include <cstdio>
#include <cstdlib>
#include <chrono>
#include <cassert>
2025-04-03 15:29:28 +02:00
#include <random>
#include "fastrand.h"
#define N 10000000
// #define N 19999999
// #define M 10000000 // M >= N
#define M 19999999 // M >= N
2025-04-02 20:10:22 +02:00
/*
#define FROM 100
2025-04-01 22:31:53 +02:00
#define TO 576 // [FROM, TO)
2025-04-02 20:10:22 +02:00
*/
uint32_t res[M] = { 0 };
int main() {
assert(M >= N); // M >= N
// Init
srand((unsigned int)time(NULL));
rand_state rs = init_rand();
2025-04-01 22:31:53 +02:00
rand_ilp_state rs_ilp = init_rand_ilp();
2025-04-03 15:29:28 +02:00
// C++ engines
std::linear_congruential_engine<uint32_t, 1664525u, 1013904223u, 0> lce;
std::mt19937 mte;
std::minstd_rand lce_def;
2025-04-02 20:10:22 +02:00
// Generate FROM,TO as random, because otherwise compiler optimizes out IDIV of the '%' operator!
uint32_t FROM = (uint32_t) rand();
uint32_t TO = (uint32_t) rand();
printf("Full range generation perf - %d number of cases:\n", N);
auto t0 = std::chrono::high_resolution_clock::now();
// arc4
for (int i = 0; i < N; ++i) {
res[i] += arc4random();
}
auto t1 = std::chrono::high_resolution_clock::now();
// rand
for (int i = 0; i < N; ++i) {
res[i] += rand();
}
auto t2 = std::chrono::high_resolution_clock::now();
2025-04-03 15:29:28 +02:00
// C++ LCG
for (int i = 0; i < N; ++i) {
res[i] += lce_def();
}
auto t21 = std::chrono::high_resolution_clock::now();
// C++ LCG - my parameters
for (int i = 0; i < N; ++i) {
res[i] += lce();
}
auto t211 = std::chrono::high_resolution_clock::now();
// C++ MT
for (int i = 0; i < N; ++i) {
res[i] += mte();
}
auto t22 = std::chrono::high_resolution_clock::now();
// lcg
for (int i = 0; i < N; ++i) {
res[i] += lcg(&rs);
}
auto t3 = std::chrono::high_resolution_clock::now();
2025-04-01 22:31:53 +02:00
// lcg4
#pragma GCC unroll 4
for (int i = 0; i < N; ++i) {
// res[i] += lcg_ilp(&rs_ilp, (RAND_ILP)(i % (RAND_ILP_MAX + 1)));
res[i] += lcg_ilp(&rs_ilp, (RAND_ILP)(i % 4));
}
auto t31 = std::chrono::high_resolution_clock::now();
// results 1
auto arc4_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0);
auto rand_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1);
2025-04-03 15:29:28 +02:00
auto lce_def_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t21 - t2);
auto lce_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t211 - t21);
auto mt_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t22 - t21);
auto lcg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t3 - t22);
2025-04-01 22:31:53 +02:00
auto lcg4_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t31 - t3);
printf("Time (arc4): %.3f ms.\n", arc4_elapsed.count() * 1e-6);
printf("Time (rand): %.3f ms.\n", rand_elapsed.count() * 1e-6);
2025-04-03 15:29:28 +02:00
printf("Time (C++ lcg): %.3f ms.\n", lce_def_elapsed.count() * 1e-6);
printf("Time (C++ lcg my parameters): %.3f ms.\n", lce_elapsed.count() * 1e-6);
printf("Time (C++ mersenne twister 32bit): %.3f ms.\n", mt_elapsed.count() * 1e-6);
printf("Time (lcg): %.3f ms.\n", lcg_elapsed.count() * 1e-6);
2025-04-01 22:31:53 +02:00
printf("Time (lcg4): %.3f ms.\n", lcg4_elapsed.count() * 1e-6);
printf("Modulo VS nomod perf for rand_between (both LCG) - %d number of cases:\n", M);
auto t4 = std::chrono::high_resolution_clock::now();
// rand + modulo
for (int i = 0; i < M; ++i) {
res[i] += FROM + (rand() % (TO - FROM));
}
auto t5 = std::chrono::high_resolution_clock::now();
// lcg + modulo
for (int i = 0; i < M; ++i) {
res[i] += FROM + (lcg(&rs) % (TO - FROM));
}
auto t6 = std::chrono::high_resolution_clock::now();
// rand_between (also LCG, but no modulus)
for (int i = 0; i < M; ++i) {
res[i] += rand_between(&rs, FROM, TO);
}
auto t7 = std::chrono::high_resolution_clock::now();
// results 2
auto randmod_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t5 - t4);
auto mod_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t6 - t5);
auto between_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t7 - t6);
uint32_t choice = rand_between(&rs, FROM, TO);
printf("rand + modulo [%u, %u): %.3f ms.\n", FROM, TO, randmod_elapsed.count() * 1e-6);
printf("lcg + modulo [%u, %u): %.3f ms.\n", FROM, TO, mod_elapsed.count() * 1e-6);
printf("rand_between [%u, %u): %.3f ms.\n", FROM, TO, between_elapsed.count() * 1e-6);
// checksum - avoids optimizing out above loops
uint32_t sum = 0;
for(int i = 0; i < M; ++i) {
sum += res[i];
}
printf("Checksum: 0x%x\n", sum);
return 0;
}