#include #include #include #include #include #include "fastrand.h" #define N 10000000 // #define N 19999999 // #define M 10000000 // M >= N #define M 19999999 // M >= N /* #define FROM 100 #define TO 576 // [FROM, TO) */ uint32_t res[M] = { 0 }; int main() { assert(M >= N); // M >= N // Init srand((unsigned int)time(NULL)); rand_state rs = init_rand(); rand_ilp_state rs_ilp = init_rand_ilp(); // C++ engines std::linear_congruential_engine lce; std::mt19937 mte; std::minstd_rand lce_def; // Generate FROM,TO as random, because otherwise compiler optimizes out IDIV of the '%' operator! uint32_t FROM = (uint32_t) rand(); uint32_t TO = (uint32_t) rand(); printf("Full range generation perf - %d number of cases:\n", N); auto t0 = std::chrono::high_resolution_clock::now(); // arc4 for (int i = 0; i < N; ++i) { res[i] += arc4random(); } auto t1 = std::chrono::high_resolution_clock::now(); // rand for (int i = 0; i < N; ++i) { res[i] += rand(); } auto t2 = std::chrono::high_resolution_clock::now(); // C++ LCG for (int i = 0; i < N; ++i) { res[i] += lce_def(); } auto t21 = std::chrono::high_resolution_clock::now(); // C++ LCG - my parameters for (int i = 0; i < N; ++i) { res[i] += lce(); } auto t211 = std::chrono::high_resolution_clock::now(); // C++ MT for (int i = 0; i < N; ++i) { res[i] += mte(); } auto t22 = std::chrono::high_resolution_clock::now(); // lcg for (int i = 0; i < N; ++i) { res[i] += lcg(&rs); } auto t3 = std::chrono::high_resolution_clock::now(); // lcg4 #pragma GCC unroll 4 for (int i = 0; i < N; ++i) { // res[i] += lcg_ilp(&rs_ilp, (RAND_ILP)(i % (RAND_ILP_MAX + 1))); res[i] += lcg_ilp(&rs_ilp, (RAND_ILP)(i % 4)); } auto t31 = std::chrono::high_resolution_clock::now(); // results 1 auto arc4_elapsed = std::chrono::duration_cast(t1 - t0); auto rand_elapsed = std::chrono::duration_cast(t2 - t1); auto lce_def_elapsed = std::chrono::duration_cast(t21 - t2); auto lce_elapsed = std::chrono::duration_cast(t211 - t21); auto mt_elapsed = std::chrono::duration_cast(t22 - t21); auto lcg_elapsed = std::chrono::duration_cast(t3 - t22); auto lcg4_elapsed = std::chrono::duration_cast(t31 - t3); printf("Time (arc4): %.3f ms.\n", arc4_elapsed.count() * 1e-6); printf("Time (rand): %.3f ms.\n", rand_elapsed.count() * 1e-6); printf("Time (C++ lcg): %.3f ms.\n", lce_def_elapsed.count() * 1e-6); printf("Time (C++ lcg my parameters): %.3f ms.\n", lce_elapsed.count() * 1e-6); printf("Time (C++ mersenne twister 32bit): %.3f ms.\n", mt_elapsed.count() * 1e-6); printf("Time (lcg): %.3f ms.\n", lcg_elapsed.count() * 1e-6); printf("Time (lcg4): %.3f ms.\n", lcg4_elapsed.count() * 1e-6); printf("Modulo VS nomod perf for rand_between (both LCG) - %d number of cases:\n", M); auto t4 = std::chrono::high_resolution_clock::now(); // rand + modulo for (int i = 0; i < M; ++i) { res[i] += FROM + (rand() % (TO - FROM)); } auto t5 = std::chrono::high_resolution_clock::now(); // lcg + modulo for (int i = 0; i < M; ++i) { res[i] += FROM + (lcg(&rs) % (TO - FROM)); } auto t6 = std::chrono::high_resolution_clock::now(); // rand_between (also LCG, but no modulus) for (int i = 0; i < M; ++i) { res[i] += rand_between(&rs, FROM, TO); } auto t7 = std::chrono::high_resolution_clock::now(); // results 2 auto randmod_elapsed = std::chrono::duration_cast(t5 - t4); auto mod_elapsed = std::chrono::duration_cast(t6 - t5); auto between_elapsed = std::chrono::duration_cast(t7 - t6); uint32_t choice = rand_between(&rs, FROM, TO); printf("rand + modulo [%u, %u): %.3f ms.\n", FROM, TO, randmod_elapsed.count() * 1e-6); printf("lcg + modulo [%u, %u): %.3f ms.\n", FROM, TO, mod_elapsed.count() * 1e-6); printf("rand_between [%u, %u): %.3f ms.\n", FROM, TO, between_elapsed.count() * 1e-6); // checksum - avoids optimizing out above loops uint32_t sum = 0; for(int i = 0; i < M; ++i) { sum += res[i]; } printf("Checksum: 0x%x\n", sum); return 0; }