diff --git a/fastrand.h b/fastrand.h index ef99d67..5673e81 100644 --- a/fastrand.h +++ b/fastrand.h @@ -112,7 +112,7 @@ static inline uint32_t lcg_ilp(rand_ilp_state *state, RAND_ILP which) { state->d = state->d * 1664525u + 1013904223u; return state->d; } else if(which == E) { - state->e = state->a * 1664525u + 1013904223u; + state->e = state->e * 1664525u + 1013904223u; return state->e; } else if(which == F) { state->f = state->f * 1664525u + 1013904223u; @@ -134,6 +134,10 @@ static inline uint32_t rand_until(rand_state *restrict state, uint32_t until) { return (uint32_t)(((uint64_t)rand * until) >> 32); } +uint32_t fastmodlike(uint32_t num, uint32_t m) { + return (((uint64_t) num) * m); +} + /** * Pick a "reasonably random" number in [from, to) without modulus. * diff --git a/perf.cpp b/perf.cpp index 9d43242..1df5117 100644 --- a/perf.cpp +++ b/perf.cpp @@ -8,8 +8,10 @@ // #define N 19999999 // #define M 10000000 // M >= N #define M 19999999 // M >= N +/* #define FROM 100 #define TO 576 // [FROM, TO) +*/ uint32_t res[M] = { 0 }; @@ -21,6 +23,10 @@ int main() { rand_state rs = init_rand(); rand_ilp_state rs_ilp = init_rand_ilp(); + // Generate FROM,TO as random, because otherwise compiler optimizes out IDIV of the '%' operator! + uint32_t FROM = (uint32_t) rand(); + uint32_t TO = (uint32_t) rand(); + printf("Full range generation perf - %d number of cases:\n", N); auto t0 = std::chrono::high_resolution_clock::now();