From 5fe3db54285ece5f385e819f594a648a9fb5c612 Mon Sep 17 00:00:00 2001 From: Richard Thier Date: Tue, 1 Apr 2025 20:25:53 +0200 Subject: [PATCH] much better perf tests because no summing now that serialized opcodes --- makefile | 2 ++ perf.cpp | 54 ++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/makefile b/makefile index b97102c..fb59045 100644 --- a/makefile +++ b/makefile @@ -4,3 +4,5 @@ release: gcc main.c -O2 -o main perf: g++ perf.cpp -O2 -o perftest; ./perftest +perf-debug: + g++ perf.cpp -g -o perftest; gdb ./perftest diff --git a/perf.cpp b/perf.cpp index eedba51..bd23af0 100644 --- a/perf.cpp +++ b/perf.cpp @@ -5,82 +5,100 @@ #include "fastrand.h" #define N 10000000 -#define M 99999999 // M > N +// #define N 19999999 +// #define M 10000000 // M >= N +#define M 19999999 // M >= N #define FROM 100 #define TO 576 +uint32_t res[M] = { 0 }; + int main() { - assert(M > N); // M > N + assert(M >= N); // M >= N // Init srand((unsigned int)time(NULL)); rand_state rs = init_rand(); - uint32_t sum = 0; // to avoid compiler optimizing out stuff printf("Full range generation perf - %d number of cases:\n", N); auto t0 = std::chrono::high_resolution_clock::now(); - // rand + // arc4 for (int i = 0; i < N; ++i) { - sum += rand(); + res[i] += arc4random(); } auto t1 = std::chrono::high_resolution_clock::now(); - // arc4 + // rand for (int i = 0; i < N; ++i) { - sum += arc4random(); + res[i] += rand(); } auto t2 = std::chrono::high_resolution_clock::now(); // lcg for (int i = 0; i < N; ++i) { - sum += lcg(&rs); + res[i] += lcg(&rs); } auto t3 = std::chrono::high_resolution_clock::now(); // results 1 - auto rand_elapsed = std::chrono::duration_cast(t1 - t0); - auto arc4_elapsed = std::chrono::duration_cast(t2 - t1); + auto arc4_elapsed = std::chrono::duration_cast(t1 - t0); + auto rand_elapsed = std::chrono::duration_cast(t2 - t1); auto lcg_elapsed = std::chrono::duration_cast(t3 - t2); - printf("Time (rand): %.3f ms.\n", rand_elapsed.count() * 1e-6); printf("Time (arc4): %.3f ms.\n", arc4_elapsed.count() * 1e-6); + printf("Time (rand): %.3f ms.\n", rand_elapsed.count() * 1e-6); printf("Time (lcg): %.3f ms.\n", lcg_elapsed.count() * 1e-6); printf("Modulo VS nomod perf for rand_between (both LCG) - %d number of cases:\n", M); + auto t4 = std::chrono::high_resolution_clock::now(); - // lcg + modulo + // rand + modulo for (int i = 0; i < M; ++i) { - sum += FROM + (lcg(&rs) % (TO - FROM)); + res[i] += FROM + (rand() % (TO - FROM)); } auto t5 = std::chrono::high_resolution_clock::now(); - // rand_between (also LCG, but no modulus) + // lcg + modulo for (int i = 0; i < M; ++i) { - sum += rand_between(&rs, FROM, TO); + res[i] += FROM + (lcg(&rs) % (TO - FROM)); } auto t6 = std::chrono::high_resolution_clock::now(); + // rand_between (also LCG, but no modulus) + for (int i = 0; i < M; ++i) { + res[i] += rand_between(&rs, FROM, TO); + } + + auto t7 = std::chrono::high_resolution_clock::now(); + + // results 2 - auto mod_elapsed = std::chrono::duration_cast(t5 - t4); - auto between_elapsed = std::chrono::duration_cast(t6 - t5); + auto randmod_elapsed = std::chrono::duration_cast(t5 - t4); + auto mod_elapsed = std::chrono::duration_cast(t6 - t5); + auto between_elapsed = std::chrono::duration_cast(t7 - t6); uint32_t choice = rand_between(&rs, FROM, TO); + printf("rand + modulo [%u, %u): %.3f ms.\n", FROM, TO, randmod_elapsed.count() * 1e-6); printf("lcg + modulo [%u, %u): %.3f ms.\n", FROM, TO, mod_elapsed.count() * 1e-6); printf("rand_between [%u, %u): %.3f ms.\n", FROM, TO, between_elapsed.count() * 1e-6); - // checksum - avoid optimizing out loops + // checksum - avoids optimizing out above loops + uint32_t sum = 0; + for(int i = 0; i < M; ++i) { + sum += res[i]; + } printf("Checksum: 0x%x\n", sum); return 0;