much better perf tests because no summing now that serialized opcodes
This commit is contained in:
parent
b1d62443f6
commit
5fe3db5428
2
makefile
2
makefile
@ -4,3 +4,5 @@ release:
|
|||||||
gcc main.c -O2 -o main
|
gcc main.c -O2 -o main
|
||||||
perf:
|
perf:
|
||||||
g++ perf.cpp -O2 -o perftest; ./perftest
|
g++ perf.cpp -O2 -o perftest; ./perftest
|
||||||
|
perf-debug:
|
||||||
|
g++ perf.cpp -g -o perftest; gdb ./perftest
|
||||||
|
|||||||
54
perf.cpp
54
perf.cpp
@ -5,82 +5,100 @@
|
|||||||
#include "fastrand.h"
|
#include "fastrand.h"
|
||||||
|
|
||||||
#define N 10000000
|
#define N 10000000
|
||||||
#define M 99999999 // M > N
|
// #define N 19999999
|
||||||
|
// #define M 10000000 // M >= N
|
||||||
|
#define M 19999999 // M >= N
|
||||||
#define FROM 100
|
#define FROM 100
|
||||||
#define TO 576
|
#define TO 576
|
||||||
|
|
||||||
|
uint32_t res[M] = { 0 };
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
assert(M > N); // M > N
|
assert(M >= N); // M >= N
|
||||||
|
|
||||||
// Init
|
// Init
|
||||||
srand((unsigned int)time(NULL));
|
srand((unsigned int)time(NULL));
|
||||||
rand_state rs = init_rand();
|
rand_state rs = init_rand();
|
||||||
uint32_t sum = 0; // to avoid compiler optimizing out stuff
|
|
||||||
|
|
||||||
printf("Full range generation perf - %d number of cases:\n", N);
|
printf("Full range generation perf - %d number of cases:\n", N);
|
||||||
|
|
||||||
auto t0 = std::chrono::high_resolution_clock::now();
|
auto t0 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// rand
|
// arc4
|
||||||
for (int i = 0; i < N; ++i) {
|
for (int i = 0; i < N; ++i) {
|
||||||
sum += rand();
|
res[i] += arc4random();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto t1 = std::chrono::high_resolution_clock::now();
|
auto t1 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// arc4
|
// rand
|
||||||
for (int i = 0; i < N; ++i) {
|
for (int i = 0; i < N; ++i) {
|
||||||
sum += arc4random();
|
res[i] += rand();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto t2 = std::chrono::high_resolution_clock::now();
|
auto t2 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// lcg
|
// lcg
|
||||||
for (int i = 0; i < N; ++i) {
|
for (int i = 0; i < N; ++i) {
|
||||||
sum += lcg(&rs);
|
res[i] += lcg(&rs);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto t3 = std::chrono::high_resolution_clock::now();
|
auto t3 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// results 1
|
// results 1
|
||||||
|
|
||||||
auto rand_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0);
|
auto arc4_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0);
|
||||||
auto arc4_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1);
|
auto rand_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1);
|
||||||
auto lcg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t3 - t2);
|
auto lcg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t3 - t2);
|
||||||
|
|
||||||
printf("Time (rand): %.3f ms.\n", rand_elapsed.count() * 1e-6);
|
|
||||||
printf("Time (arc4): %.3f ms.\n", arc4_elapsed.count() * 1e-6);
|
printf("Time (arc4): %.3f ms.\n", arc4_elapsed.count() * 1e-6);
|
||||||
|
printf("Time (rand): %.3f ms.\n", rand_elapsed.count() * 1e-6);
|
||||||
printf("Time (lcg): %.3f ms.\n", lcg_elapsed.count() * 1e-6);
|
printf("Time (lcg): %.3f ms.\n", lcg_elapsed.count() * 1e-6);
|
||||||
|
|
||||||
printf("Modulo VS nomod perf for rand_between (both LCG) - %d number of cases:\n", M);
|
printf("Modulo VS nomod perf for rand_between (both LCG) - %d number of cases:\n", M);
|
||||||
|
|
||||||
|
|
||||||
auto t4 = std::chrono::high_resolution_clock::now();
|
auto t4 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// lcg + modulo
|
// rand + modulo
|
||||||
for (int i = 0; i < M; ++i) {
|
for (int i = 0; i < M; ++i) {
|
||||||
sum += FROM + (lcg(&rs) % (TO - FROM));
|
res[i] += FROM + (rand() % (TO - FROM));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto t5 = std::chrono::high_resolution_clock::now();
|
auto t5 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// rand_between (also LCG, but no modulus)
|
// lcg + modulo
|
||||||
for (int i = 0; i < M; ++i) {
|
for (int i = 0; i < M; ++i) {
|
||||||
sum += rand_between(&rs, FROM, TO);
|
res[i] += FROM + (lcg(&rs) % (TO - FROM));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto t6 = std::chrono::high_resolution_clock::now();
|
auto t6 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
// rand_between (also LCG, but no modulus)
|
||||||
|
for (int i = 0; i < M; ++i) {
|
||||||
|
res[i] += rand_between(&rs, FROM, TO);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto t7 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
|
||||||
// results 2
|
// results 2
|
||||||
|
|
||||||
auto mod_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t5 - t4);
|
auto randmod_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t5 - t4);
|
||||||
auto between_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t6 - t5);
|
auto mod_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t6 - t5);
|
||||||
|
auto between_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(t7 - t6);
|
||||||
|
|
||||||
uint32_t choice = rand_between(&rs, FROM, TO);
|
uint32_t choice = rand_between(&rs, FROM, TO);
|
||||||
|
printf("rand + modulo [%u, %u): %.3f ms.\n", FROM, TO, randmod_elapsed.count() * 1e-6);
|
||||||
printf("lcg + modulo [%u, %u): %.3f ms.\n", FROM, TO, mod_elapsed.count() * 1e-6);
|
printf("lcg + modulo [%u, %u): %.3f ms.\n", FROM, TO, mod_elapsed.count() * 1e-6);
|
||||||
printf("rand_between [%u, %u): %.3f ms.\n", FROM, TO, between_elapsed.count() * 1e-6);
|
printf("rand_between [%u, %u): %.3f ms.\n", FROM, TO, between_elapsed.count() * 1e-6);
|
||||||
|
|
||||||
// checksum - avoid optimizing out loops
|
// checksum - avoids optimizing out above loops
|
||||||
|
|
||||||
|
uint32_t sum = 0;
|
||||||
|
for(int i = 0; i < M; ++i) {
|
||||||
|
sum += res[i];
|
||||||
|
}
|
||||||
printf("Checksum: 0x%x\n", sum);
|
printf("Checksum: 0x%x\n", sum);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user