From bd1ae74bcfab359d3ec0d126f3dd23199eb646ba Mon Sep 17 00:00:00 2001 From: Ben Haller Date: Sat, 1 Nov 2025 14:11:00 -0400 Subject: [PATCH] shift from taus2 and MT19937-64 to PCG RNGs --- CMakeLists.txt | 2 +- SLiM.xcodeproj/project.pbxproj | 4 + .../xcshareddata/xcschemes/SLiM.xcscheme | 8 +- VERSIONS | 1 + core/chromosome.cpp | 55 +- core/chromosome.h | 53 +- core/genomic_element_type.cpp | 4 +- core/haplosome.cpp | 4 +- core/interaction_type.cpp | 18 +- core/mutation_type.cpp | 20 +- core/population.cpp | 143 +- core/slim_functions.cpp | 10 +- core/slim_test_other.cpp | 46 +- core/spatial_kernel.cpp | 86 +- core/spatial_map.cpp | 16 +- core/species.cpp | 4 +- core/subpopulation.cpp | 169 +- core/subpopulation.h | 42 +- eidos/eidos_functions_distributions.cpp | 120 +- eidos/eidos_functions_values.cpp | 69 +- eidos/eidos_rng.cpp | 222 +- eidos/eidos_rng.h | 451 ++-- eidos/eidos_test.cpp | 2 +- eidos/eidos_test_functions_statistics.cpp | 146 +- eidos/eidos_test_functions_vector.cpp | 35 +- eidos/pcg_extras.hpp | 666 ++++++ eidos/pcg_random.hpp | 1951 +++++++++++++++++ 27 files changed, 3457 insertions(+), 890 deletions(-) create mode 100644 eidos/pcg_extras.hpp create mode 100644 eidos/pcg_random.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f7a150c69..9e94f8dbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,7 +90,7 @@ if(CPPCHECK) find_program(CPPCHECK_EXECUTABLE cppcheck) if (CPPCHECK_EXECUTABLE) message(STATUS "CPPCHECK is ${CPPCHECK}; building with cppcheck (for development)") - set(CPPCHECK_COMMAND "${CPPCHECK_EXECUTABLE}" "--enable=all;--suppress=missingIncludeSystem;--suppress=syntaxError;--suppress=unmatchedSuppression;--inline-suppr;--std=c++11;--quiet;--suppress=*:robin_hood.h;--suppress=*:lodepng.cpp;--suppress=checkersReport;--suppress=*:eidos_openmp.h;--suppress=unusedFunction") + set(CPPCHECK_COMMAND "${CPPCHECK_EXECUTABLE}" "--enable=all;--suppress=missingIncludeSystem;--suppress=syntaxError;--suppress=unmatchedSuppression;--inline-suppr;--std=c++11;--quiet;--suppress=*:robin_hood.h;--suppress=*:lodepng.cpp;--suppress=*:pcg_extras.hpp;--suppress=*:pcg_random.hpp;--suppress=checkersReport;--suppress=*:eidos_openmp.h;--suppress=unusedFunction") message(STATUS "+++ cppcheck is at ${CPPCHECK_EXECUTABLE}") message(STATUS "+++ CPPCHECK_COMMAND is ${CPPCHECK_COMMAND}") else() diff --git a/SLiM.xcodeproj/project.pbxproj b/SLiM.xcodeproj/project.pbxproj index 3522d9e06..5c057561a 100644 --- a/SLiM.xcodeproj/project.pbxproj +++ b/SLiM.xcodeproj/project.pbxproj @@ -2189,6 +2189,8 @@ 98D7D6642AB24CBC002AFE34 /* chisq.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = chisq.c; sourceTree = ""; }; 98D7EBEE28CE557C00DEAAC4 /* eidos_multi */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = eidos_multi; sourceTree = BUILT_PRODUCTS_DIR; }; 98D7ED2D28CE58FC00DEAAC4 /* slim_multi */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = slim_multi; sourceTree = BUILT_PRODUCTS_DIR; }; + 98D957882EB53494008314C1 /* pcg_extras.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = pcg_extras.hpp; sourceTree = ""; }; + 98D957892EB53494008314C1 /* pcg_random.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = pcg_random.hpp; sourceTree = ""; }; 98DB3D6D1E6122AE00E2C200 /* interaction_type.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = interaction_type.cpp; sourceTree = ""; }; 98DB3D6E1E6122AE00E2C200 /* interaction_type.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = interaction_type.h; sourceTree = ""; }; 98DC9838289986B300160DD8 /* GitSHA1.cpp.in */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = GitSHA1.cpp.in; sourceTree = ""; }; @@ -2389,6 +2391,8 @@ 98235687252FDD120096A745 /* lodepng.h */, 98235681252FDCF50096A745 /* lodepng.cpp */, 98186DB8254A8B1600F9118C /* robin_hood.h */, + 98D957882EB53494008314C1 /* pcg_extras.hpp */, + 98D957892EB53494008314C1 /* pcg_random.hpp */, ); name = dependencies; sourceTree = ""; diff --git a/SLiM.xcodeproj/xcshareddata/xcschemes/SLiM.xcscheme b/SLiM.xcodeproj/xcshareddata/xcschemes/SLiM.xcscheme index 1d2c371b5..06d202a6e 100644 --- a/SLiM.xcodeproj/xcshareddata/xcschemes/SLiM.xcscheme +++ b/SLiM.xcodeproj/xcshareddata/xcschemes/SLiM.xcscheme @@ -40,7 +40,7 @@ + isEnabled = "NO"> + isEnabled = "NO"> + isEnabled = "YES"> (gsl_ran_discrete(rng, lookup)); + int mut_subrange_index = static_cast(gsl_ran_discrete(rng_gsl, lookup)); const GESubrange &subrange = (*subranges)[mut_subrange_index]; GenomicElement *source_element = subrange.genomic_element_ptr_; // Draw the position along the chromosome for the mutation, within the genomic element - slim_position_t position = subrange.start_position_ + static_cast(Eidos_rng_uniform_int_MT64(mt, subrange.end_position_ - subrange.start_position_ + 1)); - // old 32-bit position not MT64 code: - //slim_position_t position = subrange.start_position_ + static_cast(Eidos_rng_uniform_int(rng, (uint32_t)(subrange.end_position_ - subrange.start_position_ + 1))); + slim_position_t position = subrange.start_position_ + static_cast(Eidos_rng_interval_uint64(rng_64, subrange.end_position_ - subrange.start_position_ + 1)); p_positions.emplace_back(position, source_element); } @@ -1310,8 +1308,8 @@ MutationIndex Chromosome::DrawNewMutationExtended(std::pair(gsl_ran_discrete(rng, lookup)); + int recombination_interval = static_cast(gsl_ran_discrete(rng_gsl, lookup)); // choose a breakpoint anywhere in the chosen recombination interval with equal probability @@ -1508,7 +1506,7 @@ void Chromosome::_DrawCrossoverBreakpoints(IndividualSex p_parent_sex, const int // positions to the left of its enclosed bases, up to and including the position to the left of the final base given as the // end position of the interval. The next interval's first owned recombination position is therefore to the left of the // base that is one position to the right of the end of the preceding interval. So we have to add one to the position - // given by recombination_end_positions_[recombination_interval - 1], at minimum. Since Eidos_rng_uniform_int() returns + // given by recombination_end_positions_[recombination_interval - 1], at minimum. Since Eidos_rng_interval_uint64() returns // a zero-based random number, that means we need a +1 here as well. // // The key fact here is that a recombination breakpoint position of 1 means "break to the left of the base at position 1" – @@ -1517,7 +1515,7 @@ void Chromosome::_DrawCrossoverBreakpoints(IndividualSex p_parent_sex, const int // breakpoint. When their position is *equal*, the breakpoint gets serviced by switching strands. That logic causes the // breakpoints to fall to the left of their designated base. // - // Note that Eidos_rng_uniform_int() crashes (well, aborts fatally) if passed 0 for n. We need to guarantee that that doesn't + // Note that Eidos_rng_interval_uint64() crashes (well, aborts fatally) if passed 0 for n. We need to guarantee that that doesn't // happen, and we don't want to waste time checking for that condition here. For a 1-base model, we are guaranteed that // the overall recombination rate will be zero, by the logic in InitializeDraws(), and so we should not be called in the // first place. For longer chromosomes that start with a 1-base recombination interval, the rate calculated by @@ -1526,9 +1524,9 @@ void Chromosome::_DrawCrossoverBreakpoints(IndividualSex p_parent_sex, const int // since we guarantee that recombination end positions are in strictly ascending order. So we should never crash. :-> if (recombination_interval == 0) - breakpoint = static_cast(Eidos_rng_uniform_int_MT64(mt, (*end_positions)[recombination_interval]) + 1); + breakpoint = static_cast(Eidos_rng_interval_uint64(rng_64, (*end_positions)[recombination_interval]) + 1); else - breakpoint = (*end_positions)[recombination_interval - 1] + 1 + static_cast(Eidos_rng_uniform_int_MT64(mt, (*end_positions)[recombination_interval] - (*end_positions)[recombination_interval - 1])); + breakpoint = (*end_positions)[recombination_interval - 1] + 1 + static_cast(Eidos_rng_interval_uint64(rng_64, (*end_positions)[recombination_interval] - (*end_positions)[recombination_interval - 1])); p_crossovers.emplace_back(breakpoint); } @@ -1604,7 +1602,9 @@ void Chromosome::_DrawDSBBreakpoints(IndividualSex p_parent_sex, const int p_num // to a collision, because such redrawing would be liable to produce bias towards shorter extents. (Redrawing the crossover/ // noncrossover and simple/complex decisions would probably be harmless, but it is simpler to just make all decisions up front.) int try_count = 0; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); + static std::vector> dsb_infos; // using a static prevents reallocation // If the redrawLengthsOnFailure parameter to initializeGeneConversion() is T, we jump back here on layout failure @@ -1618,8 +1618,8 @@ void Chromosome::_DrawDSBBreakpoints(IndividualSex p_parent_sex, const int p_num for (int i = 0; i < p_num_breakpoints; i++) { // If the gene conversion tract mean length is < 2.0, gsl_ran_geometric() will blow up, and we should treat the tract length as zero - bool noncrossover = (Eidos_rng_uniform(rng) <= non_crossover_fraction_); // tuple position 2 - bool simple = (Eidos_rng_uniform(rng) <= simple_conversion_fraction_); // tuple position 3 + bool noncrossover = (Eidos_rng_uniform_doubleCO(rng_64) <= non_crossover_fraction_); // tuple position 2 + bool simple = (Eidos_rng_uniform_doubleCO(rng_64) <= simple_conversion_fraction_); // tuple position 3 dsb_infos.emplace_back(0, 0, noncrossover, simple); } @@ -1628,10 +1628,10 @@ void Chromosome::_DrawDSBBreakpoints(IndividualSex p_parent_sex, const int p_num { for (int i = 0; i < p_num_breakpoints; i++) { - slim_position_t extent1 = gsl_ran_geometric(rng, gene_conversion_inv_half_length_); // tuple position 0 - slim_position_t extent2 = gsl_ran_geometric(rng, gene_conversion_inv_half_length_); // tuple position 1 - bool noncrossover = (Eidos_rng_uniform(rng) <= non_crossover_fraction_); // tuple position 2 - bool simple = (Eidos_rng_uniform(rng) <= simple_conversion_fraction_); // tuple position 3 + slim_position_t extent1 = gsl_ran_geometric(rng_gsl, gene_conversion_inv_half_length_); // tuple position 0 + slim_position_t extent2 = gsl_ran_geometric(rng_gsl, gene_conversion_inv_half_length_); // tuple position 1 + bool noncrossover = (Eidos_rng_uniform_doubleCO(rng_64) <= non_crossover_fraction_); // tuple position 2 + bool simple = (Eidos_rng_uniform_doubleCO(rng_64) <= simple_conversion_fraction_); // tuple position 3 dsb_infos.emplace_back(extent1, extent2, noncrossover, simple); } @@ -1650,19 +1650,18 @@ void Chromosome::_DrawDSBBreakpoints(IndividualSex p_parent_sex, const int p_num } // First draw DSB points; dsb_points contains positions and a flag for whether the breakpoint is at a rate=0.5 position - Eidos_MT_State *mt = EIDOS_MT_RNG(omp_get_thread_num()); static std::vector> dsb_points; // using a static prevents reallocation dsb_points.resize(0); for (int i = 0; i < p_num_breakpoints; i++) { slim_position_t breakpoint = 0; - int recombination_interval = static_cast(gsl_ran_discrete(rng, lookup)); + int recombination_interval = static_cast(gsl_ran_discrete(rng_gsl, lookup)); if (recombination_interval == 0) - breakpoint = static_cast(Eidos_rng_uniform_int_MT64(mt, (*end_positions)[recombination_interval]) + 1); + breakpoint = static_cast(Eidos_rng_interval_uint64(rng_64, (*end_positions)[recombination_interval]) + 1); else - breakpoint = (*end_positions)[recombination_interval - 1] + 1 + static_cast(Eidos_rng_uniform_int_MT64(mt, (*end_positions)[recombination_interval] - (*end_positions)[recombination_interval - 1])); + breakpoint = (*end_positions)[recombination_interval - 1] + 1 + static_cast(Eidos_rng_interval_uint64(rng_64, (*end_positions)[recombination_interval] - (*end_positions)[recombination_interval - 1])); if ((*rates)[recombination_interval] == 0.5) dsb_points.emplace_back(breakpoint, true); diff --git a/core/chromosome.h b/core/chromosome.h index 575093db9..d4dedcfa0 100644 --- a/core/chromosome.h +++ b/core/chromosome.h @@ -498,9 +498,9 @@ class Chromosome : public EidosDictionaryRetained // draw the number of mutations that occur, based on the overall mutation rate inline __attribute__((always_inline)) int Chromosome::DrawMutationCount(IndividualSex p_sex) const { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - #ifdef USE_GSL_POISSON + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + if (single_mutation_map_) { // With a single map, we don't care what sex we are passed; same map for all, and sex may be enabled or disabled @@ -523,21 +523,23 @@ inline __attribute__((always_inline)) int Chromosome::DrawMutationCount(Individu } } #else + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); + if (single_mutation_map_) { // With a single map, we don't care what sex we are passed; same map for all, and sex may be enabled or disabled - return Eidos_FastRandomPoisson(rng, overall_mutation_rate_H_, exp_neg_overall_mutation_rate_H_); + return Eidos_FastRandomPoisson(rng_state, overall_mutation_rate_H_, exp_neg_overall_mutation_rate_H_); } else { // With sex-specific maps, we treat males and females separately, and the individual we're given better be one of the two if (p_sex == IndividualSex::kMale) { - return Eidos_FastRandomPoisson(rng, overall_mutation_rate_M_, exp_neg_overall_mutation_rate_M_); + return Eidos_FastRandomPoisson(rng_state, overall_mutation_rate_M_, exp_neg_overall_mutation_rate_M_); } else if (p_sex == IndividualSex::kFemale) { - return Eidos_FastRandomPoisson(rng, overall_mutation_rate_F_, exp_neg_overall_mutation_rate_F_); + return Eidos_FastRandomPoisson(rng_state, overall_mutation_rate_F_, exp_neg_overall_mutation_rate_F_); } else { @@ -550,9 +552,9 @@ inline __attribute__((always_inline)) int Chromosome::DrawMutationCount(Individu // draw the number of breakpoints that occur, based on the overall recombination rate inline __attribute__((always_inline)) int Chromosome::DrawBreakpointCount(IndividualSex p_sex) const { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - #ifdef USE_GSL_POISSON + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + if (single_recombination_map_) { // With a single map, we don't care what sex we are passed; same map for all, and sex may be enabled or disabled @@ -575,21 +577,23 @@ inline __attribute__((always_inline)) int Chromosome::DrawBreakpointCount(Indivi } } #else + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); + if (single_recombination_map_) { // With a single map, we don't care what sex we are passed; same map for all, and sex may be enabled or disabled - return Eidos_FastRandomPoisson(rng, overall_recombination_rate_H_, exp_neg_overall_recombination_rate_H_); + return Eidos_FastRandomPoisson(rng_state, overall_recombination_rate_H_, exp_neg_overall_recombination_rate_H_); } else { // With sex-specific maps, we treat males and females separately, and the individual we're given better be one of the two if (p_sex == IndividualSex::kMale) { - return Eidos_FastRandomPoisson(rng, overall_recombination_rate_M_, exp_neg_overall_recombination_rate_M_); + return Eidos_FastRandomPoisson(rng_state, overall_recombination_rate_M_, exp_neg_overall_recombination_rate_M_); } else if (p_sex == IndividualSex::kFemale) { - return Eidos_FastRandomPoisson(rng, overall_recombination_rate_F_, exp_neg_overall_recombination_rate_F_); + return Eidos_FastRandomPoisson(rng_state, overall_recombination_rate_F_, exp_neg_overall_recombination_rate_F_); } else { @@ -604,8 +608,9 @@ inline __attribute__((always_inline)) int Chromosome::DrawBreakpointCount(Indivi // this method relies on Eidos_FastRandomPoisson_NONZERO() and cannot be called when USE_GSL_POISSON is defined inline __attribute__((always_inline)) void Chromosome::DrawMutationAndBreakpointCounts(IndividualSex p_sex, int *p_mut_count, int *p_break_count) const { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - double u = Eidos_rng_uniform(rng); + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = rng_state->pcg64_rng_; + double u = Eidos_rng_uniform_doubleCO(rng_64); if (single_recombination_map_ && single_mutation_map_) { @@ -619,17 +624,17 @@ inline __attribute__((always_inline)) void Chromosome::DrawMutationAndBreakpoint else if (u <= probability_both_0_OR_mut_0_break_non0_H_) { *p_mut_count = 0; - *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_recombination_rate_H_, exp_neg_overall_recombination_rate_H_); + *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_recombination_rate_H_, exp_neg_overall_recombination_rate_H_); } else if (u <= probability_both_0_OR_mut_0_break_non0_OR_mut_non0_break_0_H_) { - *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_mutation_rate_H_, exp_neg_overall_mutation_rate_H_); + *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_mutation_rate_H_, exp_neg_overall_mutation_rate_H_); *p_break_count = 0; } else { - *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_mutation_rate_H_, exp_neg_overall_mutation_rate_H_); - *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_recombination_rate_H_, exp_neg_overall_recombination_rate_H_); + *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_mutation_rate_H_, exp_neg_overall_mutation_rate_H_); + *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_recombination_rate_H_, exp_neg_overall_recombination_rate_H_); } } else @@ -648,17 +653,17 @@ inline __attribute__((always_inline)) void Chromosome::DrawMutationAndBreakpoint else if (u <= probability_both_0_OR_mut_0_break_non0_M_) { *p_mut_count = 0; - *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_recombination_rate_M_, exp_neg_overall_recombination_rate_M_); + *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_recombination_rate_M_, exp_neg_overall_recombination_rate_M_); } else if (u <= probability_both_0_OR_mut_0_break_non0_OR_mut_non0_break_0_M_) { - *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_mutation_rate_M_, exp_neg_overall_mutation_rate_M_); + *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_mutation_rate_M_, exp_neg_overall_mutation_rate_M_); *p_break_count = 0; } else { - *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_mutation_rate_M_, exp_neg_overall_mutation_rate_M_); - *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_recombination_rate_M_, exp_neg_overall_recombination_rate_M_); + *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_mutation_rate_M_, exp_neg_overall_mutation_rate_M_); + *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_recombination_rate_M_, exp_neg_overall_recombination_rate_M_); } } else if (p_sex == IndividualSex::kFemale) @@ -671,17 +676,17 @@ inline __attribute__((always_inline)) void Chromosome::DrawMutationAndBreakpoint else if (u <= probability_both_0_OR_mut_0_break_non0_F_) { *p_mut_count = 0; - *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_recombination_rate_F_, exp_neg_overall_recombination_rate_F_); + *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_recombination_rate_F_, exp_neg_overall_recombination_rate_F_); } else if (u <= probability_both_0_OR_mut_0_break_non0_OR_mut_non0_break_0_F_) { - *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_mutation_rate_F_, exp_neg_overall_mutation_rate_F_); + *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_mutation_rate_F_, exp_neg_overall_mutation_rate_F_); *p_break_count = 0; } else { - *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_mutation_rate_F_, exp_neg_overall_mutation_rate_F_); - *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng, overall_recombination_rate_F_, exp_neg_overall_recombination_rate_F_); + *p_mut_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_mutation_rate_F_, exp_neg_overall_mutation_rate_F_); + *p_break_count = Eidos_FastRandomPoisson_NONZERO(rng_state, overall_recombination_rate_F_, exp_neg_overall_recombination_rate_F_); } } else diff --git a/core/genomic_element_type.cpp b/core/genomic_element_type.cpp index 921dbf7bd..12e90a7e4 100644 --- a/core/genomic_element_type.cpp +++ b/core/genomic_element_type.cpp @@ -105,8 +105,8 @@ MutationType *GenomicElementType::DrawMutationType(void) const if (!lookup_mutation_type_) EIDOS_TERMINATION << "ERROR (GenomicElementType::DrawMutationType): empty mutation type vector for genomic element type." << EidosTerminate(); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - return mutation_type_ptrs_[gsl_ran_discrete(rng, lookup_mutation_type_)]; + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + return mutation_type_ptrs_[gsl_ran_discrete(rng_gsl, lookup_mutation_type_)]; } void GenomicElementType::SetNucleotideMutationMatrix(const EidosValue_Float_SP &p_mutation_matrix) diff --git a/core/haplosome.cpp b/core/haplosome.cpp index bf4207be1..c910034b8 100644 --- a/core/haplosome.cpp +++ b/core/haplosome.cpp @@ -3375,7 +3375,7 @@ EidosValue_SP Haplosome_Class::ExecuteMethod_readHaplosomesFromMS(EidosGlobalStr // Instantiate the mutations; NOTE THAT THE STACKING POLICY IS NOT CHECKED HERE, AS THIS IS NOT CONSIDERED THE ADDITION OF A MUTATION! std::vector mutation_indices; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); for (int mut_index = 0; mut_index < segsites; ++mut_index) { @@ -3390,7 +3390,7 @@ EidosValue_SP Haplosome_Class::ExecuteMethod_readHaplosomesFromMS(EidosGlobalStr // select a nucleotide that is different from the ancestral state at this position int8_t ancestral = (int8_t)chromosome->AncestralSequence()->NucleotideAtIndex(position); - nucleotide = (int8_t)Eidos_rng_uniform_int(rng, 3); // 0, 1, 2 + nucleotide = (int8_t)Eidos_rng_interval_uint32(rng_32, 3); // 0, 1, 2 if (nucleotide == ancestral) nucleotide++; diff --git a/core/interaction_type.cpp b/core/interaction_type.cpp index c986e34cd..2fff56e66 100755 --- a/core/interaction_type.cpp +++ b/core/interaction_type.cpp @@ -4151,18 +4151,18 @@ static void DrawByWeights(int draw_count, const double *weights, int n_weights, // than the GSL; and for large counts the GSL is surely a win. Trying to figure out exactly where // the crossover is in all cases would be overkill; my testing indicates the performance difference // between the two methods is not really that large anyway. - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - if (weight_total > 0.0) { if (draw_count > 50) // the empirically determined crossover point in performance { // Use gsl_ran_discrete() to do the drawing + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_ran_discrete_t *gsl_lookup = gsl_ran_discrete_preproc(n_weights, weights); for (int64_t draw_index = 0; draw_index < draw_count; ++draw_index) { - int hit_index = (int)gsl_ran_discrete(rng, gsl_lookup); + int hit_index = (int)gsl_ran_discrete(rng_gsl, gsl_lookup); draw_indices.emplace_back(hit_index); } @@ -4172,9 +4172,11 @@ static void DrawByWeights(int draw_count, const double *weights, int n_weights, else { // Use linear search to do the drawing + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); + for (int64_t draw_index = 0; draw_index < draw_count; ++draw_index) { - double the_rose_in_the_teeth = Eidos_rng_uniform(rng) * weight_total; + double the_rose_in_the_teeth = Eidos_rng_uniform_doubleCO(rng_64) * weight_total; double cumulative_weight = 0.0; int hit_index; @@ -4358,13 +4360,13 @@ EidosValue_SP InteractionType::ExecuteMethod_drawByStrength(EidosGlobalStringID if (nnz > 0) { std::vector &exerters = exerter_subpop->parent_individuals_; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); result_vec->resize_no_initialize(count); for (int64_t result_index = 0; result_index < count; ++result_index) { - int presence_index = Eidos_rng_uniform_int(rng, nnz); // equal probability for each exerter + int presence_index = Eidos_rng_interval_uint32(rng_32, nnz); // equal probability for each exerter uint32_t exerter_index = columns[presence_index]; Individual *chosen_individual = exerters[exerter_index]; @@ -4521,13 +4523,13 @@ EidosValue_SP InteractionType::ExecuteMethod_drawByStrength(EidosGlobalStringID if (nnz > 0) { std::vector &exerters = exerter_subpop->parent_individuals_; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); result_vec->resize_no_initialize(count); for (int64_t result_index = 0; result_index < count; ++result_index) { - int presence_index = Eidos_rng_uniform_int(rng, nnz); // equal probability for each exerter + int presence_index = Eidos_rng_interval_uint32(rng_32, nnz); // equal probability for each exerter uint32_t exerter_index = columns[presence_index]; Individual *chosen_individual = exerters[exerter_index]; diff --git a/core/mutation_type.cpp b/core/mutation_type.cpp index fccef3388..3537fec38 100644 --- a/core/mutation_type.cpp +++ b/core/mutation_type.cpp @@ -233,32 +233,32 @@ double MutationType::DrawSelectionCoefficient(void) const case DFEType::kGamma: { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - return gsl_ran_gamma(rng, dfe_parameters_[1], dfe_parameters_[0] / dfe_parameters_[1]); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + return gsl_ran_gamma(rng_gsl, dfe_parameters_[1], dfe_parameters_[0] / dfe_parameters_[1]); } case DFEType::kExponential: { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - return gsl_ran_exponential(rng, dfe_parameters_[0]); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + return gsl_ran_exponential(rng_gsl, dfe_parameters_[0]); } case DFEType::kNormal: { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - return gsl_ran_gaussian(rng, dfe_parameters_[1]) + dfe_parameters_[0]; + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + return gsl_ran_gaussian(rng_gsl, dfe_parameters_[1]) + dfe_parameters_[0]; } case DFEType::kWeibull: { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - return gsl_ran_weibull(rng, dfe_parameters_[0], dfe_parameters_[1]); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + return gsl_ran_weibull(rng_gsl, dfe_parameters_[0], dfe_parameters_[1]); } case DFEType::kLaplace: { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - return gsl_ran_laplace(rng, dfe_parameters_[1]) + dfe_parameters_[0]; + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + return gsl_ran_laplace(rng_gsl, dfe_parameters_[1]) + dfe_parameters_[0]; } case DFEType::kScript: diff --git a/core/population.cpp b/core/population.cpp index f153ef9df..d44437db2 100644 --- a/core/population.cpp +++ b/core/population.cpp @@ -273,7 +273,7 @@ Subpopulation *Population::AddSubpopulationSplit(slim_objectid_t p_subpop_id, Su species_.AboutToSplitSubpop(); } - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); for (slim_popsize_t parent_index = 0; parent_index < subpop.parent_subpop_size_; parent_index++) { @@ -290,13 +290,13 @@ Subpopulation *Population::AddSubpopulationSplit(slim_objectid_t p_subpop_id, Su if (species_.SexEnabled()) { if (parent_index < subpop.parent_first_male_index_) - migrant_index = p_source_subpop.DrawFemaleParentUsingFitness(rng); + migrant_index = p_source_subpop.DrawFemaleParentUsingFitness(rng_state); else - migrant_index = p_source_subpop.DrawMaleParentUsingFitness(rng); + migrant_index = p_source_subpop.DrawMaleParentUsingFitness(rng_state); } else { - migrant_index = p_source_subpop.DrawParentUsingFitness(rng); + migrant_index = p_source_subpop.DrawParentUsingFitness(rng_state); } // TREE SEQUENCE RECORDING @@ -988,8 +988,8 @@ slim_popsize_t Population::ApplyMateChoiceCallbacks(slim_popsize_t p_parent1_ind else if (positive_count <= weights_length / 4) // the threshold here is a guess { // there are just a few positive values, so try to be faster about scanning for them by checking for zero first - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - double the_rose_in_the_teeth = Eidos_rng_uniform_pos(rng) * weights_sum; + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); + double the_rose_in_the_teeth = Eidos_rng_uniform_doubleOO(rng_64) * weights_sum; double bachelor_sum = 0.0; for (slim_popsize_t weight_index = 0; weight_index < weights_length; ++weight_index) @@ -1011,8 +1011,8 @@ slim_popsize_t Population::ApplyMateChoiceCallbacks(slim_popsize_t p_parent1_ind else { // there are many positive values, so we need to do a uniform draw and see who gets the rose - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - double the_rose_in_the_teeth = Eidos_rng_uniform_pos(rng) * weights_sum; + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); + double the_rose_in_the_teeth = Eidos_rng_uniform_doubleOO(rng_64) * weights_sum; double bachelor_sum = 0.0; for (slim_popsize_t weight_index = 0; weight_index < weights_length; ++weight_index) @@ -1057,9 +1057,9 @@ slim_popsize_t Population::ApplyMateChoiceCallbacks(slim_popsize_t p_parent1_ind #endif // The standard behavior, with no active callbacks, is to draw a male parent using the standard fitness values - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); - return (sex_enabled ? p_source_subpop->DrawMaleParentUsingFitness(rng) : p_source_subpop->DrawParentUsingFitness(rng)); + return (sex_enabled ? p_source_subpop->DrawMaleParentUsingFitness(rng_state) : p_source_subpop->DrawParentUsingFitness(rng_state)); } // apply modifyChild() callbacks to a generated child; a return of false means "do not use this child, generate a new one" @@ -1199,7 +1199,10 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice { THREAD_SAFETY_IN_ANY_PARALLEL("Population::EvolveSubpopulation(): usage of statics, probably many other issues"); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); // for use outside of parallel blocks + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = rng_state->pcg32_rng_; + EidosRNG_64_bit &rng_64 = rng_state->pcg64_rng_; + gsl_rng *rng_gsl = &rng_state->gsl_rng_; // for use outside of parallel blocks // determine the templated version of the Munge...() methods that we will call out to for reproduction // this is an optimization technique that lets us optimize away unused cruft at compile time @@ -1724,7 +1727,7 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice // CALLBACKS PRESENT: We need to generate offspring in a randomized order. This way the callbacks are presented with potential offspring // a random order, and so it is much easier to write a callback that runs for less than the full offspring generation phase (influencing a // limited number of mating events, for example). So in this code branch, we prepare an overall plan for migration and sex, and then execute - // that plan in an order randomized with Eidos_ran_shuffle(). BCH 28 September 2016: When sex is enabled, we want to generate male and female + // that plan in an order randomized with Eidos_ran_shuffle_uint32(). BCH 28 September 2016: When sex is enabled, we want to generate male and female // offspring in shuffled order. However, the vector of child individuals is organized into females first, then males, so we need to fill that // vector in an unshuffled order or we end up trying to generate a male offspring into a female slot, or vice versa. See the usage of // child_index_F, child_index_M, and child_index in the shuffle cases below. @@ -1804,16 +1807,16 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice if (fractions[2] < 0.0) EIDOS_TERMINATION << "ERROR (Population::EvolveSubpopulation): selfingRate + cloningRate > 1.0; cannot generate offspring satisfying constraints." << EidosTerminate(nullptr); - gsl_ran_multinomial(rng, 3, (unsigned int)migrants_to_generate, fractions, counts); + gsl_ran_multinomial(rng_gsl, 3, (unsigned int)migrants_to_generate, fractions, counts); number_to_self = static_cast(counts[0]); number_to_clone = static_cast(counts[1]); } else - number_to_self = static_cast(gsl_ran_binomial(rng, selfing_fraction, (unsigned int)migrants_to_generate)); + number_to_self = static_cast(gsl_ran_binomial(rng_gsl, selfing_fraction, (unsigned int)migrants_to_generate)); } else if (cloning_fraction > 0) - number_to_clone = static_cast(gsl_ran_binomial(rng, cloning_fraction, (unsigned int)migrants_to_generate)); + number_to_clone = static_cast(gsl_ran_binomial(rng_gsl, cloning_fraction, (unsigned int)migrants_to_generate)); // generate all selfed, cloned, and autogamous offspring in one shared loop slim_popsize_t migrant_count = 0; @@ -1849,7 +1852,7 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice } } - Eidos_ran_shuffle(rng, planned_offspring, total_children); + Eidos_ran_shuffle_uint32(rng_32, planned_offspring, total_children); // Now we can run through our plan vector and generate each planned child in order. slim_popsize_t child_index_F = 0, child_index_M = total_female_children, child_index; @@ -1903,21 +1906,21 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice { if (cloning_fraction > 0) { - double draw = Eidos_rng_uniform(rng); + double draw = Eidos_rng_uniform_doubleCO(rng_64); if (draw < selfing_fraction) selfed = true; else if (draw < selfing_fraction + cloning_fraction) cloned = true; } else { - double draw = Eidos_rng_uniform(rng); + double draw = Eidos_rng_uniform_doubleCO(rng_64); if (draw < selfing_fraction) selfed = true; } } else if (cloning_fraction > 0) { - double draw = Eidos_rng_uniform(rng); + double draw = Eidos_rng_uniform_doubleCO(rng_64); if (draw < cloning_fraction) cloned = true; } @@ -1934,9 +1937,9 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice slim_popsize_t parent1; if (sex_enabled) - parent1 = (child_sex == IndividualSex::kFemale) ? source_subpop.DrawFemaleParentUsingFitness(rng) : source_subpop.DrawMaleParentUsingFitness(rng); + parent1 = (child_sex == IndividualSex::kFemale) ? source_subpop.DrawFemaleParentUsingFitness(rng_state) : source_subpop.DrawMaleParentUsingFitness(rng_state); else - parent1 = source_subpop.DrawParentUsingFitness(rng); + parent1 = source_subpop.DrawParentUsingFitness(rng_state); slim_pedigreeid_t individual_pid = pedigrees_enabled ? SLiM_GetNextPedigreeID() : 0; Individual *new_child = p_subpop.child_individuals_[child_index]; @@ -1949,9 +1952,9 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice slim_popsize_t parent1; if (sex_enabled) - parent1 = source_subpop.DrawFemaleParentUsingFitness(rng); + parent1 = source_subpop.DrawFemaleParentUsingFitness(rng_state); else - parent1 = source_subpop.DrawParentUsingFitness(rng); + parent1 = source_subpop.DrawParentUsingFitness(rng_state); if (selfed) { @@ -1970,12 +1973,12 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice { if (sex_enabled) { - parent2 = source_subpop.DrawMaleParentUsingFitness(rng); + parent2 = source_subpop.DrawMaleParentUsingFitness(rng_state); } else { do - parent2 = source_subpop.DrawParentUsingFitness(rng); // selfing possible! + parent2 = source_subpop.DrawParentUsingFitness(rng_state); // selfing possible! while (prevent_incidental_selfing && (parent2 == parent1)); } } @@ -2020,12 +2023,12 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice { slim_popsize_t parent1, parent2; - parent1 = source_subpop.DrawParentUsingFitness(rng); + parent1 = source_subpop.DrawParentUsingFitness(rng_state); if (!mate_choice_callbacks) { do - parent2 = source_subpop.DrawParentUsingFitness(rng); // selfing possible! + parent2 = source_subpop.DrawParentUsingFitness(rng_state); // selfing possible! while (prevent_incidental_selfing && (parent2 == parent1)); } else @@ -2041,7 +2044,7 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice // parent1 was rejected by the callbacks, so we need to redraw a new parent1 num_tries++; - parent1 = source_subpop.DrawParentUsingFitness(rng); + parent1 = source_subpop.DrawParentUsingFitness(rng_state); if (num_tries > 1000000) EIDOS_TERMINATION << "ERROR (Population::EvolveSubpopulation): failed to generate child after 1 million attempts; terminating to avoid infinite loop." << EidosTerminate(); @@ -2122,7 +2125,7 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice if (migrant_source_count == 0) num_migrants[0] = (unsigned int)total_children_of_sex; else - gsl_ran_multinomial(rng, migrant_source_count + 1, (unsigned int)total_children_of_sex, migration_rates, num_migrants); + gsl_ran_multinomial(rng_gsl, migrant_source_count + 1, (unsigned int)total_children_of_sex, migration_rates, num_migrants); // loop over all source subpops, including ourselves for (int pop_count = 0; pop_count < migrant_source_count + 1; ++pop_count) @@ -2148,16 +2151,16 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice if (fractions[2] < 0.0) EIDOS_TERMINATION << "ERROR (Population::EvolveSubpopulation): selfingRate + cloningRate > 1.0; cannot generate offspring satisfying constraints." << EidosTerminate(nullptr); - gsl_ran_multinomial(rng, 3, (unsigned int)migrants_to_generate, fractions, counts); + gsl_ran_multinomial(rng_gsl, 3, (unsigned int)migrants_to_generate, fractions, counts); number_to_self = static_cast(counts[0]); number_to_clone = static_cast(counts[1]); } else - number_to_self = static_cast(gsl_ran_binomial(rng, selfing_fraction, (unsigned int)migrants_to_generate)); + number_to_self = static_cast(gsl_ran_binomial(rng_gsl, selfing_fraction, (unsigned int)migrants_to_generate)); } else if (cloning_fraction > 0) - number_to_clone = static_cast(gsl_ran_binomial(rng, cloning_fraction, (unsigned int)migrants_to_generate)); + number_to_clone = static_cast(gsl_ran_binomial(rng_gsl, cloning_fraction, (unsigned int)migrants_to_generate)); // generate all selfed, cloned, and autogamous offspring in one shared loop slim_popsize_t migrant_count = 0; @@ -2195,7 +2198,7 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice } } - Eidos_ran_shuffle(rng, planned_offspring, total_children); + Eidos_ran_shuffle_uint32(rng_32, planned_offspring, total_children); // Now we can run through our plan vector and generate each planned child in order. slim_popsize_t child_index_F = 0, child_index_M = total_female_children, child_index; @@ -2264,21 +2267,21 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice { if (cloning_fraction > 0) { - double draw = Eidos_rng_uniform(rng); + double draw = Eidos_rng_uniform_doubleCO(rng_64); if (draw < selfing_fraction) selfed = true; else if (draw < selfing_fraction + cloning_fraction) cloned = true; } else { - double draw = Eidos_rng_uniform(rng); + double draw = Eidos_rng_uniform_doubleCO(rng_64); if (draw < selfing_fraction) selfed = true; } } else if (cloning_fraction > 0) { - double draw = Eidos_rng_uniform(rng); + double draw = Eidos_rng_uniform_doubleCO(rng_64); if (draw < cloning_fraction) cloned = true; } @@ -2295,9 +2298,9 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice slim_popsize_t parent1; if (sex_enabled) - parent1 = (child_sex == IndividualSex::kFemale) ? source_subpop->DrawFemaleParentUsingFitness(rng) : source_subpop->DrawMaleParentUsingFitness(rng); + parent1 = (child_sex == IndividualSex::kFemale) ? source_subpop->DrawFemaleParentUsingFitness(rng_state) : source_subpop->DrawMaleParentUsingFitness(rng_state); else - parent1 = source_subpop->DrawParentUsingFitness(rng); + parent1 = source_subpop->DrawParentUsingFitness(rng_state); slim_pedigreeid_t individual_pid = pedigrees_enabled ? SLiM_GetNextPedigreeID() : 0; @@ -2311,9 +2314,9 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice slim_popsize_t parent1; if (sex_enabled) - parent1 = source_subpop->DrawFemaleParentUsingFitness(rng); + parent1 = source_subpop->DrawFemaleParentUsingFitness(rng_state); else - parent1 = source_subpop->DrawParentUsingFitness(rng); + parent1 = source_subpop->DrawParentUsingFitness(rng_state); if (selfed) { @@ -2332,12 +2335,12 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice { if (sex_enabled) { - parent2 = source_subpop->DrawMaleParentUsingFitness(rng); + parent2 = source_subpop->DrawMaleParentUsingFitness(rng_state); } else { do - parent2 = source_subpop->DrawParentUsingFitness(rng); // selfing possible! + parent2 = source_subpop->DrawParentUsingFitness(rng_state); // selfing possible! while (prevent_incidental_selfing && (parent2 == parent1)); } } @@ -2370,7 +2373,7 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice // we need to even change the source subpop for our next attempt, so that differential mortality between different // migration sources leads to differential representation in the offspring generation – more offspring from the // subpop that is more successful at contributing migrants. - gsl_ran_multinomial(rng, migrant_source_count + 1, 1, migration_rates, num_migrants); + gsl_ran_multinomial(rng_gsl, migrant_source_count + 1, 1, migration_rates, num_migrants); for (int pop_count = 0; pop_count < migrant_source_count + 1; ++pop_count) if (num_migrants[pop_count] > 0) @@ -2429,7 +2432,7 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice if (migrant_source_count == 0) num_migrants[0] = (unsigned int)total_children_of_sex; else - gsl_ran_multinomial(rng, migrant_source_count + 1, (unsigned int)total_children_of_sex, migration_rates, num_migrants); + gsl_ran_multinomial(rng_gsl, migrant_source_count + 1, (unsigned int)total_children_of_sex, migration_rates, num_migrants); // loop over all source subpops, including ourselves for (int pop_count = 0; pop_count < migrant_source_count + 1; ++pop_count) @@ -2455,16 +2458,16 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice if (fractions[2] < 0.0) EIDOS_TERMINATION << "ERROR (Population::EvolveSubpopulation): selfingRate + cloningRate > 1.0; cannot generate offspring satisfying constraints." << EidosTerminate(nullptr); - gsl_ran_multinomial(rng, 3, (unsigned int)migrants_to_generate, fractions, counts); + gsl_ran_multinomial(rng_gsl, 3, (unsigned int)migrants_to_generate, fractions, counts); number_to_self = static_cast(counts[0]); number_to_clone = static_cast(counts[1]); } else - number_to_self = static_cast(gsl_ran_binomial(rng, selfing_fraction, (unsigned int)migrants_to_generate)); + number_to_self = static_cast(gsl_ran_binomial(rng_gsl, selfing_fraction, (unsigned int)migrants_to_generate)); } else if (cloning_fraction > 0) - number_to_clone = static_cast(gsl_ran_binomial(rng, cloning_fraction, (unsigned int)migrants_to_generate)); + number_to_clone = static_cast(gsl_ran_binomial(rng_gsl, cloning_fraction, (unsigned int)migrants_to_generate)); // We get a whole block of pedigree IDs to use in the loop below, avoiding race conditions / locking // We are also going to use Individual objects from a block starting at base_child_count @@ -2519,13 +2522,13 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice EIDOS_THREAD_COUNT(gEidos_OMP_threads_WF_REPRO); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, migrants_to_generate, base_child_count, base_pedigree_id, pedigrees_enabled, p_subpop, source_subpop, child_sex, prevent_incidental_selfing) if(will_parallelize) num_threads(thread_count) { - gsl_rng *parallel_rng = EIDOS_GSL_RNG(omp_get_thread_num()); + Eidos_RNG_State *parallel_rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); #pragma omp for schedule(dynamic, 1) for (slim_popsize_t migrant_count = 0; migrant_count < migrants_to_generate; migrant_count++) { - slim_popsize_t parent1 = source_subpop.DrawFemaleParentUsingFitness(parallel_rng); - slim_popsize_t parent2 = source_subpop.DrawMaleParentUsingFitness(parallel_rng); + slim_popsize_t parent1 = source_subpop.DrawFemaleParentUsingFitness(parallel_rng_state); + slim_popsize_t parent2 = source_subpop.DrawMaleParentUsingFitness(parallel_rng_state); slim_popsize_t this_child_index = base_child_count + migrant_count; Individual *new_child = p_subpop.child_individuals_[this_child_index]; @@ -2545,16 +2548,16 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice EIDOS_THREAD_COUNT(gEidos_OMP_threads_WF_REPRO); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, migrants_to_generate, base_child_count, base_pedigree_id, pedigrees_enabled, p_subpop, source_subpop, child_sex, prevent_incidental_selfing) if(will_parallelize) num_threads(thread_count) { - gsl_rng *parallel_rng = EIDOS_GSL_RNG(omp_get_thread_num()); + Eidos_RNG_State *parallel_rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); #pragma omp for schedule(dynamic, 1) for (slim_popsize_t migrant_count = 0; migrant_count < migrants_to_generate; migrant_count++) { - slim_popsize_t parent1 = source_subpop.DrawParentUsingFitness(parallel_rng); + slim_popsize_t parent1 = source_subpop.DrawParentUsingFitness(parallel_rng_state); slim_popsize_t parent2; do - parent2 = source_subpop.DrawParentUsingFitness(parallel_rng); // note this does not prohibit selfing! + parent2 = source_subpop.DrawParentUsingFitness(parallel_rng_state); // note this does not prohibit selfing! while (prevent_incidental_selfing && (parent2 == parent1)); slim_popsize_t this_child_index = base_child_count + migrant_count; @@ -2576,7 +2579,7 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice EIDOS_THREAD_COUNT(gEidos_OMP_threads_WF_REPRO); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, migrants_to_generate, number_to_clone, number_to_self, base_child_count, base_pedigree_id, pedigrees_enabled, p_subpop, source_subpop, sex_enabled, child_sex, recording_tree_sequence, prevent_incidental_selfing) if(will_parallelize) num_threads(thread_count) { - gsl_rng *parallel_rng = EIDOS_GSL_RNG(omp_get_thread_num()); + Eidos_RNG_State *parallel_rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); #pragma omp for schedule(dynamic, 1) for (slim_popsize_t migrant_count = 0; migrant_count < migrants_to_generate; migrant_count++) @@ -2586,9 +2589,9 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice slim_popsize_t parent1; if (sex_enabled) - parent1 = (child_sex == IndividualSex::kFemale) ? source_subpop.DrawFemaleParentUsingFitness(parallel_rng) : source_subpop.DrawMaleParentUsingFitness(parallel_rng); + parent1 = (child_sex == IndividualSex::kFemale) ? source_subpop.DrawFemaleParentUsingFitness(parallel_rng_state) : source_subpop.DrawMaleParentUsingFitness(parallel_rng_state); else - parent1 = source_subpop.DrawParentUsingFitness(parallel_rng); + parent1 = source_subpop.DrawParentUsingFitness(parallel_rng_state); slim_popsize_t this_child_index = base_child_count + migrant_count; Individual *new_child = p_subpop.child_individuals_[this_child_index]; @@ -2601,9 +2604,9 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice slim_popsize_t parent1; if (sex_enabled) - parent1 = source_subpop.DrawFemaleParentUsingFitness(parallel_rng); + parent1 = source_subpop.DrawFemaleParentUsingFitness(parallel_rng_state); else - parent1 = source_subpop.DrawParentUsingFitness(parallel_rng); + parent1 = source_subpop.DrawParentUsingFitness(parallel_rng_state); slim_popsize_t this_child_index = base_child_count + migrant_count; Individual *new_child = p_subpop.child_individuals_[this_child_index]; @@ -2619,12 +2622,12 @@ void Population::EvolveSubpopulation(Subpopulation &p_subpop, bool p_mate_choice if (sex_enabled) { - parent2 = source_subpop.DrawMaleParentUsingFitness(parallel_rng); + parent2 = source_subpop.DrawMaleParentUsingFitness(parallel_rng_state); } else { do - parent2 = source_subpop.DrawParentUsingFitness(parallel_rng); // selfing possible! + parent2 = source_subpop.DrawParentUsingFitness(parallel_rng_state); // selfing possible! while (prevent_incidental_selfing && (parent2 == parent1)); } @@ -4658,8 +4661,8 @@ void Population::DoHeteroduplexRepair(std::vector &p_heterodupl // and do all addition/removal in a single pass at the end of the process std::vector repair_removals; std::vector repair_additions; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = rng_state->pcg64_rng_; for (int heteroduplex_tract_index = 0; heteroduplex_tract_index < heteroduplex_tract_count; ++heteroduplex_tract_index) { @@ -4732,7 +4735,7 @@ void Population::DoHeteroduplexRepair(std::vector &p_heterodupl { // One nucleotide is A/T, the other is G/C, so GC bias is relevant here; // make a determination based on the assumption that the noncopy nucleotide is G/C - repair_toward_noncopy = (Eidos_rng_uniform(rng) <= gBGC_coeff_scaled); // 1.0 means always repair toward GC + repair_toward_noncopy = (Eidos_rng_uniform_doubleCO(rng_64) <= gBGC_coeff_scaled); // 1.0 means always repair toward GC // If the noncopy nucleotide is the A/T one, then our determination needs to be flipped if (noncopy_nuc_AT) @@ -4770,7 +4773,7 @@ void Population::DoHeteroduplexRepair(std::vector &p_heterodupl { // One nucleotide is A/T, the other is G/C, so GC bias is relevant here; // make a determination based on the assumption that the noncopy nucleotide is G/C - repair_toward_noncopy = (Eidos_rng_uniform(rng) <= gBGC_coeff_scaled); // 1.0 means always repair toward GC + repair_toward_noncopy = (Eidos_rng_uniform_doubleCO(rng_64) <= gBGC_coeff_scaled); // 1.0 means always repair toward GC // If the noncopy nucleotide is the A/T one, then our determination needs to be flipped if (noncopy_nuc_AT) @@ -4821,7 +4824,7 @@ void Population::DoHeteroduplexRepair(std::vector &p_heterodupl { // One nucleotide is A/T, the other is G/C, so GC bias is relevant here; // make a determination based on the assumption that the noncopy nucleotide is G/C - repair_toward_noncopy = (Eidos_rng_uniform(rng) <= gBGC_coeff_scaled); // 1.0 means always repair toward GC + repair_toward_noncopy = (Eidos_rng_uniform_doubleCO(rng_64) <= gBGC_coeff_scaled); // 1.0 means always repair toward GC // If the noncopy nucleotide is the A/T one, then our determination needs to be flipped if (noncopy_nuc_AT) @@ -8357,12 +8360,12 @@ void Population::PrintSample_SLiM(std::ostream &p_out, Subpopulation &p_subpop, // assemble a sample (with or without replacement) std::vector sample; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); for (slim_popsize_t s = 0; s < p_sample_size; s++) { // select a random haplosome (not a random individual) by selecting a random candidate entry - int candidate_index = static_cast(Eidos_rng_uniform_int(rng, (uint32_t)candidates.size())); + int candidate_index = static_cast(Eidos_rng_interval_uint32(rng_32, (uint32_t)candidates.size())); sample.emplace_back(candidates[candidate_index]); @@ -8411,12 +8414,12 @@ void Population::PrintSample_MS(std::ostream &p_out, Subpopulation &p_subpop, sl // assemble a sample (with or without replacement) std::vector sample; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); for (slim_popsize_t s = 0; s < p_sample_size; s++) { // select a random haplosome (not a random individual) by selecting a random candidate entry - int candidate_index = static_cast(Eidos_rng_uniform_int(rng, (uint32_t)candidates.size())); + int candidate_index = static_cast(Eidos_rng_interval_uint32(rng_32, (uint32_t)candidates.size())); sample.emplace_back(candidates[candidate_index]); @@ -8456,7 +8459,7 @@ void Population::PrintSample_VCF(std::ostream &p_out, Subpopulation &p_subpop, s // assemble a sample (with or without replacement) std::vector sample; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); int first_haplosome_index = species_.FirstHaplosomeIndices()[p_chromosome.Index()]; int last_haplosome_index = species_.LastHaplosomeIndices()[p_chromosome.Index()]; @@ -8464,7 +8467,7 @@ void Population::PrintSample_VCF(std::ostream &p_out, Subpopulation &p_subpop, s for (slim_popsize_t s = 0; s < p_sample_size; s++) { // select a random individual (not a random haplosome) by selecting a random candidate entry - int candidate_index = static_cast(Eidos_rng_uniform_int(rng, (uint32_t)candidates.size())); + int candidate_index = static_cast(Eidos_rng_interval_uint32(rng_32, (uint32_t)candidates.size())); Individual *ind = candidates[candidate_index]; // take all of its haplosomes for the chosen chromosome, including null haplosomes (needed as placeholders) diff --git a/core/slim_functions.cpp b/core/slim_functions.cpp index 607e2d2dd..970a10137 100644 --- a/core/slim_functions.cpp +++ b/core/slim_functions.cpp @@ -1849,12 +1849,12 @@ EidosValue_SP SLiM_ExecuteFunction_randomNucleotides(const std::vectorPushString(gStr_A); else if (runif < pC) string_result->PushString(gStr_C); @@ -1896,7 +1896,7 @@ EidosValue_SP SLiM_ExecuteFunction_randomNucleotides(const std::vectorset_int_no_check(0, value_index); else if (runif < pC) int_result->set_int_no_check(1, value_index); @@ -1918,7 +1918,7 @@ EidosValue_SP SLiM_ExecuteFunction_randomNucleotides(const std::vector 0.0", __LINE__); diff --git a/core/spatial_kernel.cpp b/core/spatial_kernel.cpp index 55332c5cd..4f64893d2 100644 --- a/core/spatial_kernel.cpp +++ b/core/spatial_kernel.cpp @@ -404,29 +404,33 @@ void SpatialKernel::DrawDisplacement_S1(double *displacement) { // Draw a displacement from the kernel center, weighted by kernel density // Note that we could be going either plus or minus from the center - Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); - gsl_rng *rng = rng_state->gsl_rng_; - switch (kernel_type_) { case SpatialKernelType::kFixed: { - displacement[0] = Eidos_rng_uniform(rng) * 2 * max_distance_ - max_distance_; + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); + + displacement[0] = Eidos_rng_uniform_doubleCO(rng_64) * 2 * max_distance_ - max_distance_; return; } case SpatialKernelType::kLinear: { - double d = (1 - sqrt(Eidos_rng_uniform(rng))) * max_distance_; + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = rng_state->pcg64_rng_; + + double d = (1 - sqrt(Eidos_rng_uniform_doubleCO(rng_64))) * max_distance_; displacement[0] = (Eidos_RandomBool(rng_state) ? d : -d); return; } case SpatialKernelType::kExponential: { + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = &rng_state->gsl_rng_; double d; do { - d = gsl_ran_exponential(rng, 1.0 / kernel_param2_); + d = gsl_ran_exponential(rng_gsl, 1.0 / kernel_param2_); } while (d > max_distance_); displacement[0] = (Eidos_RandomBool(rng_state) ? d : -d); @@ -434,10 +438,11 @@ void SpatialKernel::DrawDisplacement_S1(double *displacement) } case SpatialKernelType::kNormal: { + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); double d; do { - d = gsl_ran_gaussian(rng, kernel_param2_); + d = gsl_ran_gaussian(rng_gsl, kernel_param2_); } while (d > max_distance_); displacement[0] = d; @@ -445,10 +450,11 @@ void SpatialKernel::DrawDisplacement_S1(double *displacement) } case SpatialKernelType::kStudentsT: { + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); double d; do { - d = gsl_ran_tdist(rng, kernel_param2_) * kernel_param3_; + d = gsl_ran_tdist(rng_gsl, kernel_param2_) * kernel_param3_; } while (d > max_distance_); displacement[0] = d; @@ -467,35 +473,40 @@ void SpatialKernel::DrawDisplacement_S2(double *displacement) { // Draw a displacement from the kernel center, weighted by kernel density // Note that we could be going in any direction from the center - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - switch (kernel_type_) { case SpatialKernelType::kFixed: { - double theta = Eidos_rng_uniform(rng) * 2 * M_PI; - double d = sqrt(Eidos_rng_uniform(rng)) * max_distance_; + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); + double theta = Eidos_rng_uniform_doubleCO(rng_64) * 2 * M_PI; + double d = sqrt(Eidos_rng_uniform_doubleCO(rng_64)) * max_distance_; displacement[0] = cos(theta) * d; displacement[1] = sin(theta) * d; return; } case SpatialKernelType::kLinear: { - double theta = Eidos_rng_uniform(rng) * 2 * M_PI; - double d = gsl_ran_beta(rng, 2.0, 2.0) * max_distance_; + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = &rng_state->gsl_rng_; + EidosRNG_64_bit &rng_64 = rng_state->pcg64_rng_; + double theta = Eidos_rng_uniform_doubleCO(rng_64) * 2 * M_PI; + double d = gsl_ran_beta(rng_gsl, 2.0, 2.0) * max_distance_; displacement[0] = cos(theta) * d; displacement[1] = sin(theta) * d; return; } case SpatialKernelType::kExponential: { + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = &rng_state->gsl_rng_; + EidosRNG_64_bit &rng_64 = rng_state->pcg64_rng_; double d; do { - d = gsl_ran_gamma(rng, 2.0, 1.0 / kernel_param2_); + d = gsl_ran_gamma(rng_gsl, 2.0, 1.0 / kernel_param2_); } while (d > max_distance_); - double theta = Eidos_rng_uniform(rng) * 2 * M_PI; + double theta = Eidos_rng_uniform_doubleCO(rng_64) * 2 * M_PI; displacement[0] = cos(theta) * d; displacement[1] = sin(theta) * d; @@ -503,11 +514,12 @@ void SpatialKernel::DrawDisplacement_S2(double *displacement) } case SpatialKernelType::kNormal: { + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); double d1, d2; do { - d1 = gsl_ran_gaussian(rng, kernel_param2_); - d2 = gsl_ran_gaussian(rng, kernel_param2_); + d1 = gsl_ran_gaussian(rng_gsl, kernel_param2_); + d2 = gsl_ran_gaussian(rng_gsl, kernel_param2_); } while (sqrt(d1*d1 + d2*d2) > max_distance_); displacement[0] = d1; @@ -517,14 +529,15 @@ void SpatialKernel::DrawDisplacement_S2(double *displacement) case SpatialKernelType::kStudentsT: { // df (nu) is kernel_param2_, scale is kernel_param3_ + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); double d; do { - double x = 0.5 + abs(Eidos_rng_uniform(rng) - 0.5); + double x = 0.5 + abs(Eidos_rng_uniform_doubleCO(rng_64) - 0.5); d = sqrt(std::max(0.0, kernel_param2_ * (pow(2.0 - 2.0 * x, -2.0 / (kernel_param2_ - 1.0)) - 1.0))) * kernel_param3_; } while (d > max_distance_); - double theta = Eidos_rng_uniform(rng) * 2 * M_PI; + double theta = Eidos_rng_uniform_doubleCO(rng_64) * 2 * M_PI; displacement[0] = cos(theta) * d; displacement[1] = sin(theta) * d; @@ -543,17 +556,18 @@ void SpatialKernel::DrawDisplacement_S3(double *displacement) { // Draw a displacement from the kernel center, weighted by kernel density // Note that we could be going in any direction from the center - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); switch (kernel_type_) { case SpatialKernelType::kFixed: { - double dx = gsl_ran_gaussian(rng, 1.0); - double dy = gsl_ran_gaussian(rng, 1.0); - double dz = gsl_ran_gaussian(rng, 1.0); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); + double dx = gsl_ran_gaussian(rng_gsl, 1.0); + double dy = gsl_ran_gaussian(rng_gsl, 1.0); + double dz = gsl_ran_gaussian(rng_gsl, 1.0); double sphere_dist = sqrt(dx*dx + dy*dy + dz*dz); - double d = pow(Eidos_rng_uniform(rng), 1/3.0) * max_distance_; + double d = pow(Eidos_rng_uniform_doubleCO(rng_64), 1/3.0) * max_distance_; displacement[0] = dx * d / sphere_dist; displacement[1] = dy * d / sphere_dist; @@ -562,11 +576,11 @@ void SpatialKernel::DrawDisplacement_S3(double *displacement) } case SpatialKernelType::kLinear: { - double dx = gsl_ran_gaussian(rng, 1.0); - double dy = gsl_ran_gaussian(rng, 1.0); - double dz = gsl_ran_gaussian(rng, 1.0); + double dx = gsl_ran_gaussian(rng_gsl, 1.0); + double dy = gsl_ran_gaussian(rng_gsl, 1.0); + double dz = gsl_ran_gaussian(rng_gsl, 1.0); double sphere_dist = sqrt(dx*dx + dy*dy + dz*dz); - double d = gsl_ran_beta(rng, 3.0, 2.0) * max_distance_; + double d = gsl_ran_beta(rng_gsl, 3.0, 2.0) * max_distance_; displacement[0] = dx * d / sphere_dist; displacement[1] = dy * d / sphere_dist; @@ -575,14 +589,14 @@ void SpatialKernel::DrawDisplacement_S3(double *displacement) } case SpatialKernelType::kExponential: { - double dx = gsl_ran_gaussian(rng, 1.0); - double dy = gsl_ran_gaussian(rng, 1.0); - double dz = gsl_ran_gaussian(rng, 1.0); + double dx = gsl_ran_gaussian(rng_gsl, 1.0); + double dy = gsl_ran_gaussian(rng_gsl, 1.0); + double dz = gsl_ran_gaussian(rng_gsl, 1.0); double sphere_dist = sqrt(dx*dx + dy*dy + dz*dz); double d; do { - d = gsl_ran_gamma(rng, 3.0, 1.0 / kernel_param2_); + d = gsl_ran_gamma(rng_gsl, 3.0, 1.0 / kernel_param2_); } while (d > max_distance_); displacement[0] = dx * d / sphere_dist; @@ -595,9 +609,9 @@ void SpatialKernel::DrawDisplacement_S3(double *displacement) double d1, d2, d3; do { - d1 = gsl_ran_gaussian(rng, kernel_param2_); - d2 = gsl_ran_gaussian(rng, kernel_param2_); - d3 = gsl_ran_gaussian(rng, kernel_param2_); + d1 = gsl_ran_gaussian(rng_gsl, kernel_param2_); + d2 = gsl_ran_gaussian(rng_gsl, kernel_param2_); + d3 = gsl_ran_gaussian(rng_gsl, kernel_param2_); } while (sqrt(d1*d1 + d2*d2 + d3*d3) > max_distance_); displacement[0] = d1; diff --git a/core/spatial_map.cpp b/core/spatial_map.cpp index 8b8453b60..c05cd327e 100644 --- a/core/spatial_map.cpp +++ b/core/spatial_map.cpp @@ -2604,7 +2604,7 @@ EidosValue_SP SpatialMap::ExecuteMethod_sampleImprovedNearbyPoint(EidosGlobalStr EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(coordinate_count); double *result_ptr = float_result->data_mutable(); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); if (spatiality_ == 1) { @@ -2649,7 +2649,7 @@ EidosValue_SP SpatialMap::ExecuteMethod_sampleImprovedNearbyPoint(EidosGlobalStr double original_map_value = ValueAtPoint_S1(rescaled_point); double map_value = ValueAtPoint_S1(rescaled_displaced); - if ((map_value > original_map_value) || (map_value > original_map_value * Eidos_rng_uniform(rng))) + if ((map_value > original_map_value) || (map_value > original_map_value * Eidos_rng_uniform_doubleCO(rng_64))) *(result_ptr++) = displaced_point[0]; else *(result_ptr++) = point_a; @@ -2709,7 +2709,7 @@ EidosValue_SP SpatialMap::ExecuteMethod_sampleImprovedNearbyPoint(EidosGlobalStr double original_map_value = ValueAtPoint_S2(rescaled_point); double map_value = ValueAtPoint_S2(rescaled_displaced); - if ((map_value > original_map_value) || (map_value > original_map_value * Eidos_rng_uniform(rng))) + if ((map_value > original_map_value) || (map_value > original_map_value * Eidos_rng_uniform_doubleCO(rng_64))) { *(result_ptr++) = displaced_point[0]; *(result_ptr++) = displaced_point[1]; @@ -2786,7 +2786,7 @@ EidosValue_SP SpatialMap::ExecuteMethod_sampleImprovedNearbyPoint(EidosGlobalStr double original_map_value = ValueAtPoint_S3(rescaled_point); double map_value = ValueAtPoint_S3(rescaled_displaced); - if ((map_value > original_map_value) || (map_value > original_map_value * Eidos_rng_uniform(rng))) + if ((map_value > original_map_value) || (map_value > original_map_value * Eidos_rng_uniform_doubleCO(rng_64))) { *(result_ptr++) = displaced_point[0]; *(result_ptr++) = displaced_point[1]; @@ -2845,7 +2845,7 @@ EidosValue_SP SpatialMap::ExecuteMethod_sampleNearbyPoint(EidosGlobalStringID p_ EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(coordinate_count); double *result_ptr = float_result->data_mutable(); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); if (spatiality_ == 1) { @@ -2892,7 +2892,7 @@ EidosValue_SP SpatialMap::ExecuteMethod_sampleNearbyPoint(EidosGlobalStringID p_ rescaled_point[0] = (displaced_point[0] - bounds_a0_) / (bounds_a1_ - bounds_a0_); map_value = ValueAtPoint_S1(rescaled_point); } - while (values_max_ * Eidos_rng_uniform(rng) > map_value); + while (values_max_ * Eidos_rng_uniform_doubleCO(rng_64) > map_value); *(result_ptr++) = displaced_point[0]; } @@ -2952,7 +2952,7 @@ EidosValue_SP SpatialMap::ExecuteMethod_sampleNearbyPoint(EidosGlobalStringID p_ rescaled_point[1] = (displaced_point[1] - bounds_b0_) / (bounds_b1_ - bounds_b0_); map_value = ValueAtPoint_S2(rescaled_point); } - while (values_max_ * Eidos_rng_uniform(rng) > map_value); + while (values_max_ * Eidos_rng_uniform_doubleCO(rng_64) > map_value); *(result_ptr++) = displaced_point[0]; *(result_ptr++) = displaced_point[1]; @@ -3023,7 +3023,7 @@ EidosValue_SP SpatialMap::ExecuteMethod_sampleNearbyPoint(EidosGlobalStringID p_ rescaled_point[2] = (displaced_point[2] - bounds_c0_) / (bounds_c1_ - bounds_c0_); map_value = ValueAtPoint_S3(rescaled_point); } - while (values_max_ * Eidos_rng_uniform(rng) > map_value); + while (values_max_ * Eidos_rng_uniform_doubleCO(rng_64) > map_value); *(result_ptr++) = displaced_point[0]; *(result_ptr++) = displaced_point[1]; diff --git a/core/species.cpp b/core/species.cpp index 3f8013fdd..cc3ee742d 100644 --- a/core/species.cpp +++ b/core/species.cpp @@ -4362,8 +4362,8 @@ slim_popsize_t *Species::BorrowShuffleBuffer(slim_popsize_t p_buffer_size) if (shuffle_buf_size_ > 0) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - Eidos_ran_shuffle(rng, buffer_contents, shuffle_buf_size_); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); + Eidos_ran_shuffle_uint32(rng_32, buffer_contents, shuffle_buf_size_); } } else diff --git a/core/subpopulation.cpp b/core/subpopulation.cpp index bb0a5bf0a..2f3ffb1e7 100644 --- a/core/subpopulation.cpp +++ b/core/subpopulation.cpp @@ -6055,7 +6055,7 @@ void Subpopulation::ViabilitySurvival(std::vector &p_survival_c #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, survival_buffer, parent_subpop_size_) firstprivate(individual_data) if(parent_subpop_size_ >= EIDOS_OMPMIN_SURVIVAL) num_threads(thread_count) { uint8_t *survival_buf_perthread = survival_buffer; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(dynamic, 1024) nowait for (int individual_index = 0; individual_index < parent_subpop_size_; ++individual_index) @@ -6066,7 +6066,7 @@ void Subpopulation::ViabilitySurvival(std::vector &p_survival_c if (fitness <= 0.0) survived = false; else if (fitness >= 1.0) survived = true; - else survived = (Eidos_rng_uniform(rng) < fitness); + else survived = (Eidos_rng_uniform_doubleCO(rng_64) < fitness); survival_buf_perthread[individual_index] = survived; } @@ -6077,14 +6077,14 @@ void Subpopulation::ViabilitySurvival(std::vector &p_survival_c { // this is the complex case with callbacks, and therefore a shuffle buffer to randomize processing order slim_popsize_t *shuffle_buf = species_.BorrowShuffleBuffer(parent_subpop_size_); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); for (slim_popsize_t shuffle_index = 0; shuffle_index < parent_subpop_size_; shuffle_index++) { slim_popsize_t individual_index = shuffle_buf[shuffle_index]; Individual *individual = individual_data[individual_index]; double fitness = individual->cached_fitness_UNSAFE_; // never overridden in nonWF models, so this is safe with no check - double draw = Eidos_rng_uniform(rng); // always need a draw to pass to the callback + double draw = Eidos_rng_uniform_doubleCO(rng_64); // always need a draw to pass to the callback uint8_t survived = (draw < fitness); // run the survival() callbacks to allow the above decision to be modified @@ -6696,9 +6696,9 @@ IndividualSex Subpopulation::_ValidateHaplosomesAndChooseSex(ChromosomeType p_ch if ((sex_prob >= 0.0) && (sex_prob <= 1.0)) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); - offspring_sex = ((Eidos_rng_uniform(rng) < sex_prob) ? IndividualSex::kMale : IndividualSex::kFemale); + offspring_sex = ((Eidos_rng_uniform_doubleCO(rng_64) < sex_prob) ? IndividualSex::kMale : IndividualSex::kFemale); } else EIDOS_TERMINATION << "ERROR (Subpopulation::_ValidateHaplosomesAndChooseSex): probability " << sex_prob << " out of range [0.0, 1.0] for parameter sex passed to " << p_caller_name << "." << EidosTerminate(); @@ -6868,9 +6868,9 @@ IndividualSex Subpopulation::_SexForSexValue(EidosValue *p_sex_value, bool p_sex if (sex_value_type == EidosValueType::kValueNULL) { // in sexual simulations, NULL (the default) means pick a sex with equal probability - Eidos_RNG_State *rng = EIDOS_STATE_RNG(omp_get_thread_num()); + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); - sex = (Eidos_RandomBool(rng) ? IndividualSex::kMale : IndividualSex::kFemale); + sex = (Eidos_RandomBool(rng_state) ? IndividualSex::kMale : IndividualSex::kFemale); } else if (sex_value_type == EidosValueType::kValueString) { @@ -6890,9 +6890,9 @@ IndividualSex Subpopulation::_SexForSexValue(EidosValue *p_sex_value, bool p_sex if ((sex_prob >= 0.0) && (sex_prob <= 1.0)) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); - sex = ((Eidos_rng_uniform(rng) < sex_prob) ? IndividualSex::kMale : IndividualSex::kFemale); + sex = ((Eidos_rng_uniform_doubleCO(rng_64) < sex_prob) ? IndividualSex::kMale : IndividualSex::kFemale); } else EIDOS_TERMINATION << "ERROR (Subpopulation::HaplosomeConfigurationForSex): probability " << sex_prob << " out of range [0.0, 1.0] for parameter sex." << EidosTerminate(); @@ -9086,7 +9086,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositions(EidosGlobalStringID // are common, though. if ((kernel_count == 1) && (dimensionality == 2) && (kernel0.kernel_type_ == SpatialKernelType::kNormal) && std::isinf(kernel0.max_distance_) && ((boundary == BoundaryCondition::kStopping) || (boundary == BoundaryCondition::kReflecting) || (boundary == BoundaryCondition::kReprising) || (boundary == BoundaryCondition::kAbsorbing) || ((boundary == BoundaryCondition::kPeriodic) && periodic_x && periodic_y))) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); double stddev = kernel0.kernel_param2_; double bx0 = bounds_x0_, bx1 = bounds_x1_; double by0 = bounds_y0_, by1 = bounds_y1_; @@ -9097,8 +9097,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositions(EidosGlobalStringID for (int individual_index = 0; individual_index < individuals_count; ++individual_index) { Individual *ind = individuals[individual_index]; - double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng, stddev); - double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng, stddev); + double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng_gsl, stddev); a0 = std::max(bx0, std::min(bx1, a0)); a1 = std::max(by0, std::min(by1, a1)); @@ -9113,8 +9113,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositions(EidosGlobalStringID for (int individual_index = 0; individual_index < individuals_count; ++individual_index) { Individual *ind = individuals[individual_index]; - double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng, stddev); - double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng, stddev); + double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng_gsl, stddev); while (true) { @@ -9143,8 +9143,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositions(EidosGlobalStringID double a1_original = ind->spatial_y_; reprise_specialcase: - double a0 = a0_original + gsl_ran_gaussian(rng, stddev); - double a1 = a1_original + gsl_ran_gaussian(rng, stddev); + double a0 = a0_original + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = a1_original + gsl_ran_gaussian(rng_gsl, stddev); if ((a0 < bx0) || (a0 > bx1) || (a1 < by0) || (a1 > by1)) @@ -9160,8 +9160,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositions(EidosGlobalStringID for (int individual_index = 0; individual_index < individuals_count; ++individual_index) { Individual *ind = individuals[individual_index]; - double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng, stddev); - double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng, stddev); + double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng_gsl, stddev); if ((a0 < bx0) || (a0 > bx1) || (a1 < by0) || (a1 > by1)) @@ -9177,8 +9177,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositions(EidosGlobalStringID for (int individual_index = 0; individual_index < individuals_count; ++individual_index) { Individual *ind = individuals[individual_index]; - double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng, stddev); - double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng, stddev); + double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng_gsl, stddev); // (note periodic_x and periodic_y are required to be true above) while (a0 < 0.0) a0 += bx1; @@ -9548,7 +9548,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt // which is pretty marginal, but it's an easy optimization. if ((kernel_count == 1) && (dimensionality == 2) && (kernel0.kernel_type_ == SpatialKernelType::kNormal) && std::isinf(kernel0.max_distance_) && !periodic_x && !periodic_y && !periodic_z && ((boundary == BoundaryCondition::kReprising) || (boundary == BoundaryCondition::kAbsorbing))) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); double stddev = kernel0.kernel_param2_; double bx0 = bounds_x0_, bx1 = bounds_x1_; double by0 = bounds_y0_, by1 = bounds_y1_; @@ -9571,8 +9572,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt if (++num_tries == 1000000) EIDOS_TERMINATION << "ERROR (SpatialMap::ExecuteMethod_deviatePositionsWithMap): deviatePositionsWithMap() failed to find a successful deviated point by reprising after 1 million attempts; terminating to avoid infinite loop." << EidosTerminate(); - double a0 = a0_original + gsl_ran_gaussian(rng, stddev); - double a1 = a1_original + gsl_ran_gaussian(rng, stddev); + double a0 = a0_original + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = a1_original + gsl_ran_gaussian(rng_gsl, stddev); if ((a0 < bx0) || (a0 > bx1) || (a1 < by0) || (a1 > by1)) @@ -9598,7 +9599,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt else { // intermediate: do a random number draw, where value_for_point is P(within bounds) - if (Eidos_rng_uniform(rng) > value_for_point) + if (Eidos_rng_uniform_doubleCO(rng_64) > value_for_point) goto reprise_specialcase; } @@ -9620,8 +9621,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt if (++num_tries == 1000000) EIDOS_TERMINATION << "ERROR (SpatialMap::ExecuteMethod_deviatePositionsWithMap): deviatePositionsWithMap() failed to find a successful deviated point by reprising after 1 million attempts; terminating to avoid infinite loop." << EidosTerminate(); - double a0 = a0_original + gsl_ran_gaussian(rng, stddev); - double a1 = a1_original + gsl_ran_gaussian(rng, stddev); + double a0 = a0_original + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = a1_original + gsl_ran_gaussian(rng_gsl, stddev); if ((a0 < bx0) || (a0 > bx1) || (a1 < by0) || (a1 > by1)) @@ -9644,7 +9645,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt else { // intermediate: do a random number draw, where value_for_point is P(within bounds) - if (Eidos_rng_uniform(rng) > value_for_point) + if (Eidos_rng_uniform_doubleCO(rng_64) > value_for_point) goto reprise_specialcase_nointerp; } @@ -9661,8 +9662,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt for (int individual_index = 0; individual_index < individuals_count; ++individual_index) { Individual *ind = individuals[individual_index]; - double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng, stddev); - double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng, stddev); + double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng_gsl, stddev); if ((a0 < bx0) || (a0 > bx1) || (a1 < by0) || (a1 > by1)) @@ -9691,7 +9692,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt else { // intermediate: do a random number draw, where value_for_point is P(within bounds) - if (Eidos_rng_uniform(rng) > value_for_point) + if (Eidos_rng_uniform_doubleCO(rng_64) > value_for_point) result->push_object_element_capcheck_NORR(ind); } } @@ -9706,8 +9707,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt for (int individual_index = 0; individual_index < individuals_count; ++individual_index) { Individual *ind = individuals[individual_index]; - double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng, stddev); - double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng, stddev); + double a0 = ind->spatial_x_ + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = ind->spatial_y_ + gsl_ran_gaussian(rng_gsl, stddev); if ((a0 < bx0) || (a0 > bx1) || (a1 < by0) || (a1 > by1)) @@ -9733,7 +9734,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt else { // intermediate: do a random number draw, where value_for_point is P(within bounds) - if (Eidos_rng_uniform(rng) > value_for_point) + if (Eidos_rng_uniform_doubleCO(rng_64) > value_for_point) result->push_object_element_capcheck_NORR(ind); } } @@ -9747,7 +9748,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt } // main code path; note that here we may have multiple kernels defined, one per individual - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); switch (dimensionality) { @@ -9804,7 +9805,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt else { // intermediate: do a random number draw, where value_for_point is P(within bounds) - if (Eidos_rng_uniform(rng) > value_for_point) + if (Eidos_rng_uniform_doubleCO(rng_64) > value_for_point) goto reprise_1; } @@ -9837,7 +9838,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt else { // intermediate: do a random number draw, where value_for_point is P(within bounds) - if (Eidos_rng_uniform(rng) > value_for_point) + if (Eidos_rng_uniform_doubleCO(rng_64) > value_for_point) result->push_object_element_capcheck_NORR(ind); } } @@ -9918,7 +9919,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt else { // intermediate: do a random number draw, where value_for_point is P(within bounds) - if (Eidos_rng_uniform(rng) > value_for_point) + if (Eidos_rng_uniform_doubleCO(rng_64) > value_for_point) goto reprise_2; } @@ -9953,7 +9954,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt else { // intermediate: do a random number draw, where value_for_point is P(within bounds) - if (Eidos_rng_uniform(rng) > value_for_point) + if (Eidos_rng_uniform_doubleCO(rng_64) > value_for_point) result->push_object_element_capcheck_NORR(ind); } } @@ -10045,7 +10046,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt else { // intermediate: do a random number draw, where value_for_point is P(within bounds) - if (Eidos_rng_uniform(rng) > value_for_point) + if (Eidos_rng_uniform_doubleCO(rng_64) > value_for_point) goto reprise_3; } @@ -10082,7 +10083,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_deviatePositionsWithMap(EidosGlobalSt else { // intermediate: do a random number draw, where value_for_point is P(within bounds) - if (Eidos_rng_uniform(rng) > value_for_point) + if (Eidos_rng_uniform_doubleCO(rng_64) > value_for_point) result->push_object_element_capcheck_NORR(ind); } } @@ -10202,7 +10203,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointDeviated(EidosGlobalStringID p_m // are common, though. if ((kernel_count == 1) && (dimensionality == 2) && (kernel0.kernel_type_ == SpatialKernelType::kNormal) && std::isinf(kernel0.max_distance_) && ((boundary == BoundaryCondition::kStopping) || (boundary == BoundaryCondition::kReflecting) || (boundary == BoundaryCondition::kReprising) || ((boundary == BoundaryCondition::kPeriodic) && periodic_x && periodic_y))) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); double stddev = kernel0.kernel_param2_; double bx0 = bounds_x0_, bx1 = bounds_x1_; double by0 = bounds_y0_, by1 = bounds_y1_; @@ -10211,8 +10212,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointDeviated(EidosGlobalStringID p_m { for (int result_index = 0; result_index < n; ++result_index) { - double a0 = *(point_buf_ptr++) + gsl_ran_gaussian(rng, stddev); - double a1 = *(point_buf_ptr++) + gsl_ran_gaussian(rng, stddev); + double a0 = *(point_buf_ptr++) + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = *(point_buf_ptr++) + gsl_ran_gaussian(rng_gsl, stddev); a0 = std::max(bx0, std::min(bx1, a0)); a1 = std::max(by0, std::min(by1, a1)); @@ -10225,8 +10226,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointDeviated(EidosGlobalStringID p_m { for (int result_index = 0; result_index < n; ++result_index) { - double a0 = *(point_buf_ptr++) + gsl_ran_gaussian(rng, stddev); - double a1 = *(point_buf_ptr++) + gsl_ran_gaussian(rng, stddev); + double a0 = *(point_buf_ptr++) + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = *(point_buf_ptr++) + gsl_ran_gaussian(rng_gsl, stddev); while (true) { @@ -10253,8 +10254,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointDeviated(EidosGlobalStringID p_m double a1_original = *(point_buf_ptr++); reprise_specialcase: - double a0 = a0_original + gsl_ran_gaussian(rng, stddev); - double a1 = a1_original + gsl_ran_gaussian(rng, stddev); + double a0 = a0_original + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = a1_original + gsl_ran_gaussian(rng_gsl, stddev); if ((a0 < bx0) || (a0 > bx1) || (a1 < by0) || (a1 > by1)) @@ -10268,8 +10269,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointDeviated(EidosGlobalStringID p_m { for (int result_index = 0; result_index < n; ++result_index) { - double a0 = *(point_buf_ptr++) + gsl_ran_gaussian(rng, stddev); - double a1 = *(point_buf_ptr++) + gsl_ran_gaussian(rng, stddev); + double a0 = *(point_buf_ptr++) + gsl_ran_gaussian(rng_gsl, stddev); + double a1 = *(point_buf_ptr++) + gsl_ran_gaussian(rng_gsl, stddev); // (note periodic_x and periodic_y are required to be true above) while (a0 < 0.0) a0 += bx1; @@ -10955,13 +10956,13 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniform(EidosGlobalStringID p_me EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_UNIFORM_1D); #pragma omp parallel default(none) shared(point_count, gEidos_RNG_PERTHREAD) firstprivate(float_result_data) if(point_count >= EIDOS_OMPMIN_POINT_UNIFORM_1D) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); double xsize = bounds_x1_ - bounds_x0_, xbase = bounds_x0_; #pragma omp for schedule(static) for (int64_t point_index = 0; point_index < point_count; ++point_index) { - float_result_data[point_index] = Eidos_rng_uniform(rng) * xsize + xbase; + float_result_data[point_index] = Eidos_rng_uniform_doubleCO(rng_64) * xsize + xbase; } } break; @@ -10971,15 +10972,15 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniform(EidosGlobalStringID p_me EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_UNIFORM_2D); #pragma omp parallel default(none) shared(point_count, gEidos_RNG_PERTHREAD) firstprivate(float_result_data) if(point_count >= EIDOS_OMPMIN_POINT_UNIFORM_2D) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); double xsize = bounds_x1_ - bounds_x0_, xbase = bounds_x0_; double ysize = bounds_y1_ - bounds_y0_, ybase = bounds_y0_; #pragma omp for schedule(static) for (int64_t point_index = 0; point_index < point_count; ++point_index) { - float_result_data[point_index * 2] = Eidos_rng_uniform(rng) * xsize + xbase; - float_result_data[point_index * 2 + 1] = Eidos_rng_uniform(rng) * ysize + ybase; + float_result_data[point_index * 2] = Eidos_rng_uniform_doubleCO(rng_64) * xsize + xbase; + float_result_data[point_index * 2 + 1] = Eidos_rng_uniform_doubleCO(rng_64) * ysize + ybase; } } break; @@ -10989,7 +10990,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniform(EidosGlobalStringID p_me EIDOS_THREAD_COUNT(gEidos_OMP_threads_POINT_UNIFORM_3D); #pragma omp parallel default(none) shared(point_count, gEidos_RNG_PERTHREAD) firstprivate(float_result_data) if(point_count >= EIDOS_OMPMIN_POINT_UNIFORM_3D) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); double xsize = bounds_x1_ - bounds_x0_, xbase = bounds_x0_; double ysize = bounds_y1_ - bounds_y0_, ybase = bounds_y0_; double zsize = bounds_z1_ - bounds_z0_, zbase = bounds_z0_; @@ -10997,9 +10998,9 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniform(EidosGlobalStringID p_me #pragma omp for schedule(static) for (int64_t point_index = 0; point_index < point_count; ++point_index) { - float_result_data[point_index * 3] = Eidos_rng_uniform(rng) * xsize + xbase; - float_result_data[point_index * 3 + 1] = Eidos_rng_uniform(rng) * ysize + ybase; - float_result_data[point_index * 3 + 2] = Eidos_rng_uniform(rng) * zsize + zbase; + float_result_data[point_index * 3] = Eidos_rng_uniform_doubleCO(rng_64) * xsize + xbase; + float_result_data[point_index * 3 + 1] = Eidos_rng_uniform_doubleCO(rng_64) * ysize + ybase; + float_result_data[point_index * 3 + 2] = Eidos_rng_uniform_doubleCO(rng_64) * zsize + zbase; } } break; @@ -11084,7 +11085,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniformWithMap(EidosGlobalString case 1: { { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); double xsize = bounds_x1_ - bounds_x0_, xbase = bounds_x0_; for (int64_t point_index = 0; point_index < point_count; ++point_index) @@ -11097,14 +11098,14 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniformWithMap(EidosGlobalString EIDOS_TERMINATION << "ERROR (SpatialMap::ExecuteMethod_pointUniformWithMap): pointUniformWithMap() failed to find a successful drawn point after 1 million attempts; terminating to avoid infinite loop." << EidosTerminate(); // ValueAtPoint_S1() requires points normalized to [0, 1] in the map's spatiality - point_base[0] = Eidos_rng_uniform(rng); + point_base[0] = Eidos_rng_uniform_doubleCO(rng_64); double value_for_point = map->ValueAtPoint_S1(point_base); if (value_for_point <= 0) continue; else if (value_for_point >= 1) break; - else if (Eidos_rng_uniform(rng) <= value_for_point) + else if (Eidos_rng_uniform_doubleCO(rng_64) <= value_for_point) break; } while (true); @@ -11116,7 +11117,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniformWithMap(EidosGlobalString case 2: { { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); double xsize = bounds_x1_ - bounds_x0_, xbase = bounds_x0_; double ysize = bounds_y1_ - bounds_y0_, ybase = bounds_y0_; @@ -11130,15 +11131,15 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniformWithMap(EidosGlobalString EIDOS_TERMINATION << "ERROR (SpatialMap::ExecuteMethod_pointUniformWithMap): pointUniformWithMap() failed to find a successful drawn point after 1 million attempts; terminating to avoid infinite loop." << EidosTerminate(); // ValueAtPoint_S2() requires points normalized to [0, 1] in the map's spatiality - point_base[0] = Eidos_rng_uniform(rng); - point_base[1] = Eidos_rng_uniform(rng); + point_base[0] = Eidos_rng_uniform_doubleCO(rng_64); + point_base[1] = Eidos_rng_uniform_doubleCO(rng_64); double value_for_point = map->ValueAtPoint_S2(point_base); if (value_for_point <= 0) continue; else if (value_for_point >= 1) break; - else if (Eidos_rng_uniform(rng) <= value_for_point) + else if (Eidos_rng_uniform_doubleCO(rng_64) <= value_for_point) break; } while (true); @@ -11151,7 +11152,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniformWithMap(EidosGlobalString case 3: { { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); double xsize = bounds_x1_ - bounds_x0_, xbase = bounds_x0_; double ysize = bounds_y1_ - bounds_y0_, ybase = bounds_y0_; double zsize = bounds_z1_ - bounds_z0_, zbase = bounds_z0_; @@ -11166,16 +11167,16 @@ EidosValue_SP Subpopulation::ExecuteMethod_pointUniformWithMap(EidosGlobalString EIDOS_TERMINATION << "ERROR (SpatialMap::ExecuteMethod_pointUniformWithMap): pointUniformWithMap() failed to find a successful drawn point after 1 million attempts; terminating to avoid infinite loop." << EidosTerminate(); // ValueAtPoint_S3() requires points normalized to [0, 1] in the map's spatiality - point_base[0] = Eidos_rng_uniform(rng); - point_base[1] = Eidos_rng_uniform(rng); - point_base[2] = Eidos_rng_uniform(rng); + point_base[0] = Eidos_rng_uniform_doubleCO(rng_64); + point_base[1] = Eidos_rng_uniform_doubleCO(rng_64); + point_base[2] = Eidos_rng_uniform_doubleCO(rng_64); double value_for_point = map->ValueAtPoint_S3(point_base); if (value_for_point <= 0) continue; else if (value_for_point >= 1) break; - else if (Eidos_rng_uniform(rng) <= value_for_point) + else if (Eidos_rng_uniform_doubleCO(rng_64) <= value_for_point) break; } while (true); @@ -11595,8 +11596,8 @@ EidosValue_SP Subpopulation::ExecuteMethod_sampleIndividuals(EidosGlobalStringID if (sample_size == 1) { // a sample size of 1 is very common; make it as fast as we can by getting a singleton EidosValue directly from x - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); - int sample_index = (int)Eidos_rng_uniform_int(rng, candidate_count) + first_candidate_index; + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); + int sample_index = (int)Eidos_rng_interval_uint32(rng_32, candidate_count) + first_candidate_index; if ((excluded_index != -1) && (sample_index >= excluded_index)) sample_index++; @@ -11613,12 +11614,12 @@ EidosValue_SP Subpopulation::ExecuteMethod_sampleIndividuals(EidosGlobalStringID EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_INDIVIDUALS_1); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(candidate_count, first_candidate_index, excluded_index, object_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_INDIVIDUALS_1) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(static) for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - int sample_index = (int)Eidos_rng_uniform_int(rng, candidate_count) + first_candidate_index; + int sample_index = (int)Eidos_rng_interval_uint32(rng_32, candidate_count) + first_candidate_index; if ((excluded_index != -1) && (sample_index >= excluded_index)) sample_index++; @@ -11646,9 +11647,9 @@ EidosValue_SP Subpopulation::ExecuteMethod_sampleIndividuals(EidosGlobalStringID // note that the code above guarantees that here there are at least two candidates to draw result_SP = EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Object(gSLiM_Individual_Class)); EidosValue_Object *result = ((EidosValue_Object *)result_SP.get())->resize_no_initialize(sample_size); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); - int sample_index1 = (int)Eidos_rng_uniform_int(rng, candidate_count) + first_candidate_index; + int sample_index1 = (int)Eidos_rng_interval_uint32(rng_32, candidate_count) + first_candidate_index; if ((excluded_index != -1) && (sample_index1 >= excluded_index)) sample_index1++; @@ -11659,7 +11660,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_sampleIndividuals(EidosGlobalStringID do { - sample_index2 = (int)Eidos_rng_uniform_int(rng, candidate_count) + first_candidate_index; + sample_index2 = (int)Eidos_rng_interval_uint32(rng_32, candidate_count) + first_candidate_index; if ((excluded_index != -1) && (sample_index2 >= excluded_index)) sample_index2++; @@ -11683,11 +11684,11 @@ EidosValue_SP Subpopulation::ExecuteMethod_sampleIndividuals(EidosGlobalStringID // the number of candidates versus the number of *valid* candidates, and there's no way to know. if ((sample_size == 1) && (candidate_count >= 30)) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); for (int try_count = 0; try_count < 20; ++try_count) { - int sample_index = (int)Eidos_rng_uniform_int(rng, candidate_count) + first_candidate_index; + int sample_index = (int)Eidos_rng_interval_uint32(rng_32, candidate_count) + first_candidate_index; if ((excluded_index != -1) && (sample_index >= excluded_index)) sample_index++; @@ -11873,12 +11874,12 @@ EidosValue_SP Subpopulation::ExecuteMethod_sampleIndividuals(EidosGlobalStringID EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_INDIVIDUALS_2); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size, index_buffer) firstprivate(candidate_count, object_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_INDIVIDUALS_2) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(static) for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - int rose_index = (int)Eidos_rng_uniform_int(rng, (uint32_t)candidate_count); + int rose_index = (int)Eidos_rng_interval_uint32(rng_32, (uint32_t)candidate_count); object_result_data[samples_generated] = parent_individuals_[index_buffer[rose_index]]; } @@ -11898,7 +11899,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_sampleIndividuals(EidosGlobalStringID else { // base case without replacement; this is not parallelized because of contention over index_buffer removals - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { @@ -11908,7 +11909,7 @@ EidosValue_SP Subpopulation::ExecuteMethod_sampleIndividuals(EidosGlobalStringID EIDOS_TERMINATION << "ERROR (Subpopulation::ExecuteMethod_sampleIndividuals): (internal error) sampleIndividuals() ran out of eligible individuals from which to sample." << EidosTerminate(nullptr); // CODE COVERAGE: This is dead code #endif - int rose_index = (int)Eidos_rng_uniform_int(rng, (uint32_t)candidate_count); + int rose_index = (int)Eidos_rng_interval_uint32(rng_32, (uint32_t)candidate_count); result->set_object_element_no_check_NORR(parent_individuals_[index_buffer[rose_index]], samples_generated); diff --git a/core/subpopulation.h b/core/subpopulation.h index f481800ff..0b4ace59d 100644 --- a/core/subpopulation.h +++ b/core/subpopulation.h @@ -246,13 +246,13 @@ class Subpopulation : public EidosDictionaryUnretained return has_null_haplosomes_; } - slim_popsize_t DrawParentUsingFitness(gsl_rng *rng) const; // WF only: draw an individual from the subpopulation based upon fitness - slim_popsize_t DrawFemaleParentUsingFitness(gsl_rng *rng) const; // WF only: draw a female from the subpopulation based upon fitness; SEX ONLY - slim_popsize_t DrawMaleParentUsingFitness(gsl_rng *rng) const; // WF only: draw a male from the subpopulation based upon fitness; SEX ONLY + slim_popsize_t DrawParentUsingFitness(Eidos_RNG_State *rng_state) const; // WF only: draw an individual from the subpopulation based upon fitness + slim_popsize_t DrawFemaleParentUsingFitness(Eidos_RNG_State *rng_state) const; // WF only: draw a female from the subpopulation based upon fitness; SEX ONLY + slim_popsize_t DrawMaleParentUsingFitness(Eidos_RNG_State *rng_state) const; // WF only: draw a male from the subpopulation based upon fitness; SEX ONLY - slim_popsize_t DrawParentEqualProbability(gsl_rng *rng) const; // draw an individual from the subpopulation with equal probabilities - slim_popsize_t DrawFemaleParentEqualProbability(gsl_rng *rng) const; // draw a female from the subpopulation with equal probabilities; SEX ONLY - slim_popsize_t DrawMaleParentEqualProbability(gsl_rng *rng) const; // draw a male from the subpopulation with equal probabilities; SEX ONLY + slim_popsize_t DrawParentEqualProbability(EidosRNG_32_bit &rng_32) const; // draw an individual from the subpopulation with equal probabilities + slim_popsize_t DrawFemaleParentEqualProbability(EidosRNG_32_bit &rng_32) const; // draw a female from the subpopulation with equal probabilities; SEX ONLY + slim_popsize_t DrawMaleParentEqualProbability(EidosRNG_32_bit &rng_32) const; // draw a male from the subpopulation with equal probabilities; SEX ONLY inline __attribute__((always_inline)) Individual *NewSubpopIndividual(slim_popsize_t p_individual_index, IndividualSex p_sex, slim_age_t p_age, double p_fitness, float p_mean_parent_age) { @@ -502,7 +502,7 @@ class Subpopulation : public EidosDictionaryUnretained }; -inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawParentUsingFitness(gsl_rng *rng) const +inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawParentUsingFitness(Eidos_RNG_State *rng_state) const { #if DEBUG if (sex_enabled_) @@ -510,23 +510,23 @@ inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawParentUs #endif if (lookup_parent_) - return static_cast(gsl_ran_discrete(rng, lookup_parent_)); + return static_cast(gsl_ran_discrete(&rng_state->gsl_rng_, lookup_parent_)); else - return static_cast(Eidos_rng_uniform_int(rng, parent_subpop_size_)); + return static_cast(Eidos_rng_interval_uint32(rng_state->pcg32_rng_, parent_subpop_size_)); } -inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawParentEqualProbability(gsl_rng *rng) const +inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawParentEqualProbability(EidosRNG_32_bit &rng_32) const { #if DEBUG if (sex_enabled_) EIDOS_TERMINATION << "ERROR (Subpopulation::DrawParentEqualProbability): (internal error) called on a population for which sex is enabled." << EidosTerminate(); #endif - return static_cast(Eidos_rng_uniform_int(rng, parent_subpop_size_)); + return static_cast(Eidos_rng_interval_uint32(rng_32, parent_subpop_size_)); } // SEX ONLY -inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawFemaleParentUsingFitness(gsl_rng *rng) const +inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawFemaleParentUsingFitness(Eidos_RNG_State *rng_state) const { #if DEBUG if (!sex_enabled_) @@ -534,24 +534,24 @@ inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawFemalePa #endif if (lookup_female_parent_) - return static_cast(gsl_ran_discrete(rng, lookup_female_parent_)); + return static_cast(gsl_ran_discrete(&rng_state->gsl_rng_, lookup_female_parent_)); else - return static_cast(Eidos_rng_uniform_int(rng, parent_first_male_index_)); + return static_cast(Eidos_rng_interval_uint32(rng_state->pcg32_rng_, parent_first_male_index_)); } // SEX ONLY -inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawFemaleParentEqualProbability(gsl_rng *rng) const +inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawFemaleParentEqualProbability(EidosRNG_32_bit &rng_32) const { #if DEBUG if (!sex_enabled_) EIDOS_TERMINATION << "ERROR (Subpopulation::DrawFemaleParentEqualProbability): (internal error) called on a population for which sex is not enabled." << EidosTerminate(); #endif - return static_cast(Eidos_rng_uniform_int(rng, parent_first_male_index_)); + return static_cast(Eidos_rng_interval_uint32(rng_32, parent_first_male_index_)); } // SEX ONLY -inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawMaleParentUsingFitness(gsl_rng *rng) const +inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawMaleParentUsingFitness(Eidos_RNG_State *rng_state) const { #if DEBUG if (!sex_enabled_) @@ -559,20 +559,20 @@ inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawMalePare #endif if (lookup_male_parent_) - return static_cast(gsl_ran_discrete(rng, lookup_male_parent_)) + parent_first_male_index_; + return static_cast(gsl_ran_discrete(&rng_state->gsl_rng_, lookup_male_parent_)) + parent_first_male_index_; else - return static_cast(Eidos_rng_uniform_int(rng, parent_subpop_size_ - parent_first_male_index_) + parent_first_male_index_); + return static_cast(Eidos_rng_interval_uint32(rng_state->pcg32_rng_, parent_subpop_size_ - parent_first_male_index_) + parent_first_male_index_); } // SEX ONLY -inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawMaleParentEqualProbability(gsl_rng *rng) const +inline __attribute__((always_inline)) slim_popsize_t Subpopulation::DrawMaleParentEqualProbability(EidosRNG_32_bit &rng_32) const { #if DEBUG if (!sex_enabled_) EIDOS_TERMINATION << "ERROR (Subpopulation::DrawMaleParentEqualProbability): (internal error) called on a population for which sex is not enabled." << EidosTerminate(); #endif - return static_cast(Eidos_rng_uniform_int(rng, parent_subpop_size_ - parent_first_male_index_) + parent_first_male_index_); + return static_cast(Eidos_rng_interval_uint32(rng_32, parent_subpop_size_ - parent_first_male_index_) + parent_first_male_index_); } class Subpopulation_Class : public EidosDictionaryUnretained_Class diff --git a/eidos/eidos_functions_distributions.cpp b/eidos/eidos_functions_distributions.cpp index 3baad7297..d83a2d371 100644 --- a/eidos/eidos_functions_distributions.cpp +++ b/eidos/eidos_functions_distributions.cpp @@ -726,7 +726,7 @@ EidosValue_SP Eidos_ExecuteFunction_rbeta(const std::vector &p_ar double alpha0 = (arg_alpha_count ? arg_alpha->NumericAtIndex_NOCAST(0, nullptr) : 0.0); double beta0 = (arg_beta_count ? arg_beta->NumericAtIndex_NOCAST(0, nullptr) : 0.0); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); if (alpha_singleton && beta_singleton) { @@ -739,7 +739,7 @@ EidosValue_SP Eidos_ExecuteFunction_rbeta(const std::vector &p_ar result_SP = EidosValue_SP(float_result); for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - float_result->set_float_no_check(gsl_ran_beta(rng, alpha0, beta0), draw_index); + float_result->set_float_no_check(gsl_ran_beta(rng_gsl, alpha0, beta0), draw_index); } else { @@ -756,7 +756,7 @@ EidosValue_SP Eidos_ExecuteFunction_rbeta(const std::vector &p_ar if (beta <= 0.0) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_rbeta): function rbeta() requires beta > 0.0 (" << EidosStringForFloat(beta) << " supplied)." << EidosTerminate(nullptr); - float_result->set_float_no_check(gsl_ran_beta(rng, alpha, beta), draw_index); + float_result->set_float_no_check(gsl_ran_beta(rng_gsl, alpha, beta), draw_index); } } @@ -820,11 +820,11 @@ EidosValue_SP Eidos_ExecuteFunction_rbinom(const std::vector &p_a EIDOS_THREAD_COUNT(gEidos_OMP_threads_RBINOM_2); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, probability0, size0) if(num_draws >= EIDOS_OMPMIN_RBINOM_2) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(dynamic, 1024) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - int_result->set_int_no_check(gsl_ran_binomial(rng, probability0, size0), draw_index); + int_result->set_int_no_check(gsl_ran_binomial(rng_gsl, probability0, size0), draw_index); } } } @@ -838,7 +838,7 @@ EidosValue_SP Eidos_ExecuteFunction_rbinom(const std::vector &p_a EIDOS_THREAD_COUNT(gEidos_OMP_threads_RBINOM_3); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, size_singleton, prob_singleton, size0, probability0, size_data, prob_data) reduction(||: saw_error1) reduction(||: saw_error2) if(num_draws >= EIDOS_OMPMIN_RBINOM_3) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(dynamic, 1024) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) @@ -857,7 +857,7 @@ EidosValue_SP Eidos_ExecuteFunction_rbinom(const std::vector &p_a continue; } - int_result->set_int_no_check(gsl_ran_binomial(rng, probability, size), draw_index); + int_result->set_int_no_check(gsl_ran_binomial(rng_gsl, probability, size), draw_index); } } @@ -895,7 +895,7 @@ EidosValue_SP Eidos_ExecuteFunction_rcauchy(const std::vector &p_ double location0 = (arg_location_count ? arg_location->NumericAtIndex_NOCAST(0, nullptr) : 0.0); double scale0 = (arg_scale_count ? arg_scale->NumericAtIndex_NOCAST(0, nullptr) : 1.0); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); if (location_singleton && scale_singleton) { @@ -906,7 +906,7 @@ EidosValue_SP Eidos_ExecuteFunction_rcauchy(const std::vector &p_ result_SP = EidosValue_SP(float_result); for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - float_result->set_float_no_check(gsl_ran_cauchy(rng, scale0) + location0, draw_index); + float_result->set_float_no_check(gsl_ran_cauchy(rng_gsl, scale0) + location0, draw_index); } else { @@ -921,7 +921,7 @@ EidosValue_SP Eidos_ExecuteFunction_rcauchy(const std::vector &p_ if (scale <= 0.0) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_rcauchy): function rcauchy() requires scale > 0.0 (" << EidosStringForFloat(scale) << " supplied)." << EidosTerminate(nullptr); - float_result->set_float_no_check(gsl_ran_cauchy(rng, scale) + location, draw_index); + float_result->set_float_no_check(gsl_ran_cauchy(rng_gsl, scale) + location, draw_index); } } @@ -984,11 +984,11 @@ EidosValue_SP Eidos_ExecuteFunction_rdunif(const std::vector &p_a EIDOS_THREAD_COUNT(gEidos_OMP_threads_RDUNIF_2); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, min_value0, count0) if(num_draws >= EIDOS_OMPMIN_RDUNIF_2) num_threads(thread_count) { - Eidos_MT_State *mt = EIDOS_MT_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(dynamic, 1024) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - int_result->set_int_no_check(Eidos_rng_uniform_int_MT64(mt, count0) + min_value0, draw_index); + int_result->set_int_no_check(Eidos_rng_interval_uint64(rng_64, count0) + min_value0, draw_index); } } } @@ -1002,7 +1002,7 @@ EidosValue_SP Eidos_ExecuteFunction_rdunif(const std::vector &p_a EIDOS_THREAD_COUNT(gEidos_OMP_threads_RDUNIF_3); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, min_singleton, max_singleton, min_value0, max_value0, min_data, max_data) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RDUNIF_3) num_threads(thread_count) { - Eidos_MT_State *mt = EIDOS_MT_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(dynamic, 1024) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) @@ -1017,7 +1017,7 @@ EidosValue_SP Eidos_ExecuteFunction_rdunif(const std::vector &p_a continue; } - int_result->set_int_no_check(Eidos_rng_uniform_int_MT64(mt, count) + min_value, draw_index); + int_result->set_int_no_check(Eidos_rng_interval_uint64(rng_64, count) + min_value, draw_index); } } @@ -1098,11 +1098,11 @@ EidosValue_SP Eidos_ExecuteFunction_rexp(const std::vector &p_arg EIDOS_THREAD_COUNT(gEidos_OMP_threads_REXP_1); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, mu0) if(num_draws >= EIDOS_OMPMIN_REXP_1) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - float_result->set_float_no_check(gsl_ran_exponential(rng, mu0), draw_index); + float_result->set_float_no_check(gsl_ran_exponential(rng_gsl, mu0), draw_index); } } else @@ -1113,14 +1113,14 @@ EidosValue_SP Eidos_ExecuteFunction_rexp(const std::vector &p_arg EIDOS_THREAD_COUNT(gEidos_OMP_threads_REXP_2); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, arg_mu) if(num_draws >= EIDOS_OMPMIN_REXP_2) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) { double mu = arg_mu->NumericAtIndex_NOCAST((int)draw_index, nullptr); - float_result->set_float_no_check(gsl_ran_exponential(rng, mu), draw_index); + float_result->set_float_no_check(gsl_ran_exponential(rng_gsl, mu), draw_index); } } } @@ -1153,7 +1153,7 @@ EidosValue_SP Eidos_ExecuteFunction_rf(const std::vector &p_argum double d1_0 = (arg_d1_count ? arg_d1->NumericAtIndex_NOCAST(0, nullptr) : 0.0); double d2_0 = (arg_d2_count ? arg_d2->NumericAtIndex_NOCAST(0, nullptr) : 0.0); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); if (d1_singleton && d2_singleton) { @@ -1166,7 +1166,7 @@ EidosValue_SP Eidos_ExecuteFunction_rf(const std::vector &p_argum result_SP = EidosValue_SP(float_result); for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - float_result->set_float_no_check(gsl_ran_fdist(rng, d1_0, d2_0), draw_index); + float_result->set_float_no_check(gsl_ran_fdist(rng_gsl, d1_0, d2_0), draw_index); } else { @@ -1183,7 +1183,7 @@ EidosValue_SP Eidos_ExecuteFunction_rf(const std::vector &p_argum if (d2 <= 0.0) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_rf): function rf() requires d2 > 0.0 (" << EidosStringForFloat(d2) << " supplied)." << EidosTerminate(nullptr); - float_result->set_float_no_check(gsl_ran_fdist(rng, d1, d2), draw_index); + float_result->set_float_no_check(gsl_ran_fdist(rng_gsl, d1, d2), draw_index); } } @@ -1274,7 +1274,7 @@ EidosValue_SP Eidos_ExecuteFunction_rgamma(const std::vector &p_a double mean0 = (arg_mean_count ? arg_mean->NumericAtIndex_NOCAST(0, nullptr) : 1.0); double shape0 = (arg_shape_count ? arg_shape->NumericAtIndex_NOCAST(0, nullptr) : 0.0); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); if (mean_singleton && shape_singleton) { @@ -1287,7 +1287,7 @@ EidosValue_SP Eidos_ExecuteFunction_rgamma(const std::vector &p_a double scale = mean0 / shape0; for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - float_result->set_float_no_check(gsl_ran_gamma(rng, shape0, scale), draw_index); + float_result->set_float_no_check(gsl_ran_gamma(rng_gsl, shape0, scale), draw_index); } else { @@ -1302,7 +1302,7 @@ EidosValue_SP Eidos_ExecuteFunction_rgamma(const std::vector &p_a if (shape <= 0.0) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_rgamma): function rgamma() requires shape > 0.0 (" << EidosStringForFloat(shape) << " supplied)." << EidosTerminate(nullptr); - float_result->set_float_no_check(gsl_ran_gamma(rng, shape, mean / shape), draw_index); + float_result->set_float_no_check(gsl_ran_gamma(rng_gsl, shape, mean / shape), draw_index); } } @@ -1328,7 +1328,7 @@ EidosValue_SP Eidos_ExecuteFunction_rgeom(const std::vector &p_ar if (!p_singleton && (arg_p_count != num_draws)) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_rgeom): function rgeom() requires p to be of length 1 or n." << EidosTerminate(nullptr); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); // Note that there are two different definitions of the geometric distribution (see https://en.wikipedia.org/wiki/Geometric_distribution). // We follow R in using the definition that is supported on the set {0, 1, 2, 3, ...}. Unfortunately, gsl_ran_geometric() uses the other @@ -1350,7 +1350,7 @@ EidosValue_SP Eidos_ExecuteFunction_rgeom(const std::vector &p_ar int_result->set_int_no_check(0, draw_index); else for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - int_result->set_int_no_check(gsl_ran_geometric(rng, p0) - 1, draw_index); + int_result->set_int_no_check(gsl_ran_geometric(rng_gsl, p0) - 1, draw_index); } else { @@ -1372,7 +1372,7 @@ EidosValue_SP Eidos_ExecuteFunction_rgeom(const std::vector &p_ar EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_rgeom): function rgeom() requires 0.0 < p <= 1.0 (" << EidosStringForFloat(p) << " supplied)." << EidosTerminate(nullptr); } - int_result->set_int_no_check(gsl_ran_geometric(rng, p) - 1, draw_index); + int_result->set_int_no_check(gsl_ran_geometric(rng_gsl, p) - 1, draw_index); } } @@ -1404,7 +1404,7 @@ EidosValue_SP Eidos_ExecuteFunction_rlnorm(const std::vector &p_a double meanlog0 = (arg_meanlog_count ? arg_meanlog->NumericAtIndex_NOCAST(0, nullptr) : 0.0); double sdlog0 = (arg_sdlog_count ? arg_sdlog->NumericAtIndex_NOCAST(0, nullptr) : 1.0); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); if (meanlog_singleton && sdlog_singleton) { @@ -1412,7 +1412,7 @@ EidosValue_SP Eidos_ExecuteFunction_rlnorm(const std::vector &p_a result_SP = EidosValue_SP(float_result); for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - float_result->set_float_no_check(gsl_ran_lognormal(rng, meanlog0, sdlog0), draw_index); + float_result->set_float_no_check(gsl_ran_lognormal(rng_gsl, meanlog0, sdlog0), draw_index); } else { @@ -1424,7 +1424,7 @@ EidosValue_SP Eidos_ExecuteFunction_rlnorm(const std::vector &p_a double meanlog = (meanlog_singleton ? meanlog0 : arg_meanlog->NumericAtIndex_NOCAST(draw_index, nullptr)); double sdlog = (sdlog_singleton ? sdlog0 : arg_sdlog->NumericAtIndex_NOCAST(draw_index, nullptr)); - float_result->set_float_no_check(gsl_ran_lognormal(rng, meanlog, sdlog), draw_index); + float_result->set_float_no_check(gsl_ran_lognormal(rng_gsl, meanlog, sdlog), draw_index); } } @@ -1509,11 +1509,11 @@ EidosValue_SP Eidos_ExecuteFunction_rmvnorm(const std::vector &p_ EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(num_draws * d); result_SP = EidosValue_SP(float_result); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) { - gsl_err = gsl_ran_multivariate_gaussian(rng, gsl_mu, gsl_L, gsl_result); + gsl_err = gsl_ran_multivariate_gaussian(rng_gsl, gsl_mu, gsl_L, gsl_result); if (gsl_err) { @@ -1569,7 +1569,7 @@ EidosValue_SP Eidos_ExecuteFunction_rnbinom(const std::vector &p_ const double *prob_data = arg_prob->FloatData(); double probability0 = prob_data[0]; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); if (size_singleton && prob_singleton) { @@ -1582,7 +1582,7 @@ EidosValue_SP Eidos_ExecuteFunction_rnbinom(const std::vector &p_ result_SP = EidosValue_SP(int_result); for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - int_result->set_int_no_check(gsl_ran_negative_binomial(rng, probability0, size0), draw_index); + int_result->set_int_no_check(gsl_ran_negative_binomial(rng_gsl, probability0, size0), draw_index); } else { @@ -1599,7 +1599,7 @@ EidosValue_SP Eidos_ExecuteFunction_rnbinom(const std::vector &p_ if ((probability <= 0.0) || (probability > 1.0) || std::isnan(probability)) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_rnbinom): function rnbinom() requires probability in (0.0, 1.0] (" << EidosStringForFloat(probability) << " supplied)." << EidosTerminate(nullptr); - int_result->set_int_no_check(gsl_ran_negative_binomial(rng, probability, size), draw_index); + int_result->set_int_no_check(gsl_ran_negative_binomial(rng_gsl, probability, size), draw_index); } } @@ -1637,9 +1637,9 @@ EidosValue_SP Eidos_ExecuteFunction_rnorm(const std::vector &p_ar if (num_draws == 1) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); - return EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Float(gsl_ran_gaussian(rng, sigma0) + mu0)); + return EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Float(gsl_ran_gaussian(rng_gsl, sigma0) + mu0)); } EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(num_draws); @@ -1650,11 +1650,11 @@ EidosValue_SP Eidos_ExecuteFunction_rnorm(const std::vector &p_ar EIDOS_THREAD_COUNT(gEidos_OMP_threads_RNORM_1); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, sigma0, mu0) if(num_draws >= EIDOS_OMPMIN_RNORM_1) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - float_result->set_float_no_check(gsl_ran_gaussian(rng, sigma0) + mu0, draw_index); + float_result->set_float_no_check(gsl_ran_gaussian(rng_gsl, sigma0) + mu0, draw_index); } } else if (sigma_singleton) // && !mu_singleton @@ -1662,14 +1662,14 @@ EidosValue_SP Eidos_ExecuteFunction_rnorm(const std::vector &p_ar EIDOS_THREAD_COUNT(gEidos_OMP_threads_RNORM_2); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, sigma0, arg_mu) if(num_draws >= EIDOS_OMPMIN_RNORM_2) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) { double mu = arg_mu->NumericAtIndex_NOCAST((int)draw_index, nullptr); - float_result->set_float_no_check(gsl_ran_gaussian(rng, sigma0) + mu, draw_index); + float_result->set_float_no_check(gsl_ran_gaussian(rng_gsl, sigma0) + mu, draw_index); } } } @@ -1680,7 +1680,7 @@ EidosValue_SP Eidos_ExecuteFunction_rnorm(const std::vector &p_ar EIDOS_THREAD_COUNT(gEidos_OMP_threads_RNORM_3); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, mu_singleton, mu0, arg_mu, arg_sigma) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RNORM_3) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) @@ -1694,7 +1694,7 @@ EidosValue_SP Eidos_ExecuteFunction_rnorm(const std::vector &p_ar continue; } - float_result->set_float_no_check(gsl_ran_gaussian(rng, sigma) + mu, draw_index); + float_result->set_float_no_check(gsl_ran_gaussian(rng_gsl, sigma) + mu, draw_index); } } @@ -1740,11 +1740,11 @@ EidosValue_SP Eidos_ExecuteFunction_rpois(const std::vector &p_ar EIDOS_THREAD_COUNT(gEidos_OMP_threads_RPOIS_1); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, lambda0) if(num_draws >= EIDOS_OMPMIN_RPOIS_1) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - int_result->set_int_no_check(gsl_ran_poisson(rng, lambda0), draw_index); + int_result->set_int_no_check(gsl_ran_poisson(rng_gsl, lambda0), draw_index); } } else @@ -1757,7 +1757,7 @@ EidosValue_SP Eidos_ExecuteFunction_rpois(const std::vector &p_ar EIDOS_THREAD_COUNT(gEidos_OMP_threads_RPOIS_2); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(int_result, arg_lambda) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RPOIS_2) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(dynamic, 1024) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) @@ -1770,7 +1770,7 @@ EidosValue_SP Eidos_ExecuteFunction_rpois(const std::vector &p_ar continue; } - int_result->set_int_no_check(gsl_ran_poisson(rng, lambda), draw_index); + int_result->set_int_no_check(gsl_ran_poisson(rng_gsl, lambda), draw_index); } } @@ -1816,11 +1816,11 @@ EidosValue_SP Eidos_ExecuteFunction_runif(const std::vector &p_ar EIDOS_THREAD_COUNT(gEidos_OMP_threads_RUNIF_1); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws, std::cout) firstprivate(float_result) if(num_draws >= EIDOS_OMPMIN_RUNIF_1) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - float_result->set_float_no_check(Eidos_rng_uniform(rng), draw_index); + float_result->set_float_no_check(Eidos_rng_uniform_doubleCO(rng_64), draw_index); } } else @@ -1838,11 +1838,11 @@ EidosValue_SP Eidos_ExecuteFunction_runif(const std::vector &p_ar EIDOS_THREAD_COUNT(gEidos_OMP_threads_RUNIF_2); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, range0, min_value0) if(num_draws >= EIDOS_OMPMIN_RUNIF_2) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - float_result->set_float_no_check(Eidos_rng_uniform(rng) * range0 + min_value0, draw_index); + float_result->set_float_no_check(Eidos_rng_uniform_doubleCO(rng_64) * range0 + min_value0, draw_index); } } else @@ -1855,7 +1855,7 @@ EidosValue_SP Eidos_ExecuteFunction_runif(const std::vector &p_ar EIDOS_THREAD_COUNT(gEidos_OMP_threads_RUNIF_3); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, num_draws) firstprivate(float_result, min_singleton, max_singleton, min_value0, max_value0, arg_min, arg_max) reduction(||: saw_error) if(num_draws >= EIDOS_OMPMIN_RUNIF_3) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) @@ -1870,7 +1870,7 @@ EidosValue_SP Eidos_ExecuteFunction_runif(const std::vector &p_ar continue; } - float_result->set_float_no_check(Eidos_rng_uniform(rng) * range + min_value, draw_index); + float_result->set_float_no_check(Eidos_rng_uniform_doubleCO(rng_64) * range + min_value, draw_index); } } @@ -1907,7 +1907,7 @@ EidosValue_SP Eidos_ExecuteFunction_rweibull(const std::vector &p double lambda0 = (arg_lambda_count ? arg_lambda->NumericAtIndex_NOCAST(0, nullptr) : 0.0); double k0 = (arg_k_count ? arg_k->NumericAtIndex_NOCAST(0, nullptr) : 0.0); - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); if (lambda_singleton && k_singleton) { @@ -1920,7 +1920,7 @@ EidosValue_SP Eidos_ExecuteFunction_rweibull(const std::vector &p result_SP = EidosValue_SP(float_result); for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - float_result->set_float_no_check(gsl_ran_weibull(rng, lambda0, k0), draw_index); + float_result->set_float_no_check(gsl_ran_weibull(rng_gsl, lambda0, k0), draw_index); } else { @@ -1937,7 +1937,7 @@ EidosValue_SP Eidos_ExecuteFunction_rweibull(const std::vector &p if (k <= 0.0) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_rweibull): function rweibull() requires k > 0.0 (" << EidosStringForFloat(k) << " supplied)." << EidosTerminate(nullptr); - float_result->set_float_no_check(gsl_ran_weibull(rng, lambda, k), draw_index); + float_result->set_float_no_check(gsl_ran_weibull(rng_gsl, lambda, k), draw_index); } } @@ -1981,10 +1981,10 @@ EidosValue_SP Eidos_ExecuteFunction_rztpois(const std::vector &p_ // FIXME PARALLELIZE THIS { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) - int_result->set_int_no_check(Eidos_FastRandomPoisson_NONZERO(rng, lambda0, exp_neg_lambda), draw_index); + int_result->set_int_no_check(Eidos_FastRandomPoisson_NONZERO(rng_state, lambda0, exp_neg_lambda), draw_index); } } else @@ -1996,7 +1996,7 @@ EidosValue_SP Eidos_ExecuteFunction_rztpois(const std::vector &p_ // FIXME PARALLELIZE THIS { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); for (int64_t draw_index = 0; draw_index < num_draws; ++draw_index) { @@ -2008,7 +2008,7 @@ EidosValue_SP Eidos_ExecuteFunction_rztpois(const std::vector &p_ continue; } - int_result->set_int_no_check(Eidos_FastRandomPoisson_NONZERO(rng, lambda, exp(-lambda)), draw_index); + int_result->set_int_no_check(Eidos_FastRandomPoisson_NONZERO(rng_state, lambda, exp(-lambda)), draw_index); } } diff --git a/eidos/eidos_functions_values.cpp b/eidos/eidos_functions_values.cpp index b8d645492..15b1bde43 100644 --- a/eidos/eidos_functions_values.cpp +++ b/eidos/eidos_functions_values.cpp @@ -324,7 +324,7 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a // and is handled below, because gsl_ran_shuffle() can't move std::string safely if (!weights_value && !replace && (sample_size == x_count) && (sample_size != 1) && (x_type != EidosValueType::kValueString)) { - gsl_rng *main_thread_rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &main_thread_rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); result_SP = x_value->CopyValues(); EidosValue *result = result_SP.get(); @@ -334,16 +334,16 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a case EidosValueType::kValueVOID: break; // NOLINT(*-branch-clone) : intentional consecutive branches case EidosValueType::kValueNULL: break; case EidosValueType::kValueLogical: - Eidos_ran_shuffle(main_thread_rng, result->LogicalData_Mutable(), x_count); + Eidos_ran_shuffle_uint32(main_thread_rng_32, result->LogicalData_Mutable(), x_count); break; case EidosValueType::kValueInt: - Eidos_ran_shuffle(main_thread_rng, result->IntData_Mutable(), x_count); + Eidos_ran_shuffle_uint32(main_thread_rng_32, result->IntData_Mutable(), x_count); break; case EidosValueType::kValueFloat: - Eidos_ran_shuffle(main_thread_rng, result->FloatData_Mutable(), x_count); + Eidos_ran_shuffle_uint32(main_thread_rng_32, result->FloatData_Mutable(), x_count); break; case EidosValueType::kValueObject: - Eidos_ran_shuffle(main_thread_rng, result->ObjectData_Mutable(), x_count); + Eidos_ran_shuffle_uint32(main_thread_rng_32, result->ObjectData_Mutable(), x_count); break; default: EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_sample): (internal error) unsupported type in sample()" << EidosTerminate(nullptr); @@ -386,8 +386,6 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a // the algorithm used depends on whether weights were supplied if (weights_value) { - gsl_rng *main_thread_rng = EIDOS_GSL_RNG(omp_get_thread_num()); - if (replace && ((x_count > 100) || (sample_size > 100)) && (sample_size > 1)) { // a large sampling task with replacement and weights goes through an optimized code path here @@ -456,12 +454,12 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_WR_INT); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(discrete_draw, int_data, int_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_WR_INT) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - int rose_index = (int)gsl_ran_discrete(rng, discrete_draw); + int rose_index = (int)gsl_ran_discrete(rng_gsl, discrete_draw); int_result_data[samples_generated] = int_data[rose_index]; } @@ -477,12 +475,12 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_WR_FLOAT); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(discrete_draw, float_data, float_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_WR_FLOAT) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - int rose_index = (int)gsl_ran_discrete(rng, discrete_draw); + int rose_index = (int)gsl_ran_discrete(rng_gsl, discrete_draw); float_result_data[samples_generated] = float_data[rose_index]; } @@ -499,12 +497,12 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_WR_OBJECT); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(discrete_draw, object_data, object_result_data) if(sample_size >= EIDOS_OMPMIN_SAMPLE_WR_OBJECT) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - int rose_index = (int)gsl_ran_discrete(rng, discrete_draw); + int rose_index = (int)gsl_ran_discrete(rng_gsl, discrete_draw); EidosObject *object_element = object_data[rose_index]; object_result_data[samples_generated] = object_element; @@ -524,6 +522,7 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a else { // This handles the logical and string cases + gsl_rng *main_thread_rng = EIDOS_GSL_RNG(omp_get_thread_num()); result_SP = x_value->NewMatchingType(); EidosValue *result = result_SP.get(); @@ -562,7 +561,8 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a if (sample_size == 1) { // a sample size of 1 is very common; make it as fast as we can by getting a singleton EidosValue directly from x - double rose = Eidos_rng_uniform(main_thread_rng) * weights_sum; + EidosRNG_64_bit &main_thread_rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); + double rose = Eidos_rng_uniform_doubleCO(main_thread_rng_64) * weights_sum; double rose_sum = 0.0; int rose_index; @@ -580,12 +580,13 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a { // with replacement, we can just do a series of independent draws // (note the large-task case is handled with the GSL above) + EidosRNG_64_bit &main_thread_rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); result_SP = x_value->NewMatchingType(); EidosValue *result = result_SP.get(); for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - double rose = Eidos_rng_uniform(main_thread_rng) * weights_sum; + double rose = Eidos_rng_uniform_doubleCO(main_thread_rng_64) * weights_sum; double rose_sum = 0.0; int rose_index; @@ -603,6 +604,7 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a else { // without replacement, we remove each item after it is drawn, so brute force seems like the only way + EidosRNG_64_bit &main_thread_rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); result_SP = x_value->NewMatchingType(); EidosValue *result = result_SP.get(); @@ -614,7 +616,7 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a if (weights_sum <= 0.0) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_sample): function sample() encountered weights summing to <= 0." << EidosTerminate(nullptr); - double rose = Eidos_rng_uniform(main_thread_rng) * weights_sum; + double rose = Eidos_rng_uniform_doubleCO(main_thread_rng_64) * weights_sum; double rose_sum = 0.0; int rose_index; @@ -660,7 +662,8 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a if (sample_size == 1) { // a sample size of 1 is very common; make it as fast as we can by getting a singleton EidosValue directly from x - int64_t rose = (int64_t)ceil(Eidos_rng_uniform(main_thread_rng) * weights_sum); + EidosRNG_64_bit &main_thread_rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); + int64_t rose = (int64_t)ceil(Eidos_rng_uniform_doubleCO(main_thread_rng_64) * weights_sum); int64_t rose_sum = 0; int rose_index; @@ -678,12 +681,13 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a { // with replacement, we can just do a series of independent draws // (note the large-task case is handled with the GSL above) + EidosRNG_64_bit &main_thread_rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); result_SP = x_value->NewMatchingType(); EidosValue *result = result_SP.get(); for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - int64_t rose = (int64_t)ceil(Eidos_rng_uniform(main_thread_rng) * weights_sum); + int64_t rose = (int64_t)ceil(Eidos_rng_uniform_doubleCO(main_thread_rng_64) * weights_sum); int64_t rose_sum = 0; int rose_index; @@ -701,6 +705,7 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a else { // without replacement, we remove each item after it is drawn, so brute force seems like the only way + EidosRNG_64_bit &main_thread_rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); result_SP = x_value->NewMatchingType(); EidosValue *result = result_SP.get(); @@ -712,7 +717,7 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a if (weights_sum <= 0) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_sample): function sample() encountered weights summing to <= 0." << EidosTerminate(nullptr); - int64_t rose = (int64_t)ceil(Eidos_rng_uniform(main_thread_rng) * weights_sum); + int64_t rose = (int64_t)ceil(Eidos_rng_uniform_doubleCO(main_thread_rng_64) * weights_sum); int64_t rose_sum = 0; int rose_index; @@ -746,9 +751,9 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a if (sample_size == 1) { // a sample size of 1 is very common; make it as fast as we can by getting a singleton EidosValue directly from x - gsl_rng *main_thread_rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &main_thread_rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); - return x_value->GetValueAtIndex((int)Eidos_rng_uniform_int(main_thread_rng, x_count), nullptr); + return x_value->GetValueAtIndex((int)Eidos_rng_interval_uint32(main_thread_rng_32, x_count), nullptr); } else if (replace) { @@ -773,12 +778,12 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_R_INT); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(int_data, int_result_data, x_count) if(sample_size >= EIDOS_OMPMIN_SAMPLE_R_INT) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - int32_t sample = Eidos_rng_uniform_int(rng, x_count); + int32_t sample = Eidos_rng_interval_uint32(rng_32, x_count); int_result_data[samples_generated] = int_data[sample]; } } @@ -793,12 +798,12 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_R_FLOAT); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(float_data, float_result_data, x_count) if(sample_size >= EIDOS_OMPMIN_SAMPLE_R_FLOAT) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - int32_t sample = Eidos_rng_uniform_int(rng, x_count); + int32_t sample = Eidos_rng_interval_uint32(rng_32, x_count); float_result_data[samples_generated] = float_data[sample]; } } @@ -814,12 +819,12 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a EIDOS_THREAD_COUNT(gEidos_OMP_threads_SAMPLE_R_OBJECT); #pragma omp parallel default(none) shared(gEidos_RNG_PERTHREAD, sample_size) firstprivate(object_data, object_result_data, x_count) if(sample_size >= EIDOS_OMPMIN_SAMPLE_R_OBJECT) num_threads(thread_count) { - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); + EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); #pragma omp for schedule(static) nowait for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - int32_t sample = Eidos_rng_uniform_int(rng, x_count); + int32_t sample = Eidos_rng_interval_uint32(rng_32, x_count); EidosObject *object_element = object_data[sample]; object_result_data[samples_generated] = object_element; } @@ -838,13 +843,12 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a else { // This handles the logical and string cases - gsl_rng *main_thread_rng = EIDOS_GSL_RNG(omp_get_thread_num()); - + EidosRNG_32_bit &main_thread_rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); result_SP = x_value->NewMatchingType(); EidosValue *result = result_SP.get(); for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) - result->PushValueFromIndexOfEidosValue((int)Eidos_rng_uniform_int(main_thread_rng, x_count), *x_value, nullptr); + result->PushValueFromIndexOfEidosValue((int)Eidos_rng_interval_uint32(main_thread_rng_32, x_count), *x_value, nullptr); } } else @@ -855,8 +859,7 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a // gsl_ran_choose() does a gsl_rng_uniform() call for every element in x_value()! We only do one // Eidos_rng_uniform_int() call per element in sample_size, at the price of a separate index buffer // and a lack of re-entrancy and thread-safety. This is a *lot* faster for sample_size << x_count. - gsl_rng *main_thread_rng = EIDOS_GSL_RNG(omp_get_thread_num()); - + EidosRNG_32_bit &main_thread_rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); result_SP = x_value->NewMatchingType(); EidosValue *result = result_SP.get(); @@ -865,7 +868,7 @@ EidosValue_SP Eidos_ExecuteFunction_sample(const std::vector &p_a for (int64_t samples_generated = 0; samples_generated < sample_size; ++samples_generated) { - int rose_index = (int)Eidos_rng_uniform_int(main_thread_rng, (uint32_t)contender_count); + int rose_index = (int)Eidos_rng_interval_uint32(main_thread_rng_32, (uint32_t)contender_count); result->PushValueFromIndexOfEidosValue(index_buffer[rose_index], *x_value, nullptr); index_buffer[rose_index] = index_buffer[--contender_count]; } diff --git a/eidos/eidos_rng.cpp b/eidos/eidos_rng.cpp index 8da164893..b18d425d2 100644 --- a/eidos/eidos_rng.cpp +++ b/eidos/eidos_rng.cpp @@ -35,6 +35,47 @@ #endif +// GSL-compatible wrapper for the PCG64 generator +static void Eidos_GSL_RNG_PCG64_set(void *state, unsigned long int seed) +{ +#pragma unused(state, seed) + // BCH 11/1/2025: This should never be called, because gsl_rng_set() should never be called. The reason + // is that the pcg64_fast generator is a bit fussy about seeds; we should always seed it through the + // procedure followed in _Eidos_SetOneRNGSeed(). For that reason, we just skip seeding here. +#if 0 + EidosRNG_64_bit *rng_64 = static_cast(state); + + rng_64->seed(seed); +#endif +} + +static unsigned long int Eidos_GSL_RNG_PCG64_get(void *state) +{ + EidosRNG_64_bit *rng_64 = static_cast(state); + + return (*rng_64)(); +} + +static double Eidos_GSL_RNG_PCG64_get_double(void *state) +{ + EidosRNG_64_bit *rng_64 = static_cast(state); + + // generates a random double in [0,1) -- including 0 but NOT 1 + // this is a copy of Eidos_rng_uniform_doubleCO() + return ((*rng_64)() >> 11) * (1.0/9007199254740992.0); +} + +static const gsl_rng_type gEidos_GSL_RNG_PCG64 = { + "PCG64", + UINT_MAX, + 0, + sizeof(EidosRNG_64_bit), + Eidos_GSL_RNG_PCG64_set, + Eidos_GSL_RNG_PCG64_get, + Eidos_GSL_RNG_PCG64_get_double +}; + + bool gEidos_RNG_Initialized = false; #ifndef _OPENMP @@ -46,10 +87,11 @@ std::vector gEidos_RNG_PERTHREAD; static unsigned long int _Eidos_GenerateRNGSeed(void) { + unsigned long int seed; + #ifdef _WIN32 // On Windows, use the Cryptography API: Next Generation (CNG) to get a // cryptographically secure random number for use as a seed. - unsigned long int seed; // Ensure this section is thread-safe. If multiple threads request a seed // simultaneously, they must take turns to avoid corrupting system resources. @@ -65,24 +107,30 @@ static unsigned long int _Eidos_GenerateRNGSeed(void) exit(EXIT_FAILURE); } } - - return seed; #else - // on other platforms, we now use /dev/urandom as a source of seeds, which is more reliably random - // thanks to https://security.stackexchange.com/a/184211/288172 for the basis of this code - // chose urandom rather than random to avoid stalls if the random pool's entropy is low; - // semi-pseudorandom seeds should be good enough for our purposes here - unsigned long int seed; + // On other platforms, we now use /dev/urandom as a source of seeds, which is more reliably random. + // Thanks to https://security.stackexchange.com/a/184211/288172 for the basis of this code. + // I chose urandom rather than random to avoid stalls if the random pool's entropy is low; + // semi-pseudorandom seeds should be good enough for our purposes here. + // Ensure this section is thread-safe. If multiple threads request a seed + // simultaneously, they must take turns to avoid corrupting system resources. #pragma omp critical (Eidos_GenerateRNGSeed) { int fd = open("/dev/urandom", O_RDONLY); (void)(read(fd, &seed, sizeof(seed)) + 1); // ignore the result without a warning, ugh; see https://stackoverflow.com/a/13999461 close(fd); } +#endif + + // Our new pcg32_fast and pcg64_fast generators have a quirk: they ignore the lowest two bits of + // the seed, and thus in _Eidos_SetOneRNGSeed() we shift the given seed left two places. We want + // to make room for that without overflow, so we shift right here. I've chosen to shift right + // by three places here, in fact, so that we have some extra headroom for the user to increment + // the generated seed a couple of times without risking overflow. + seed >>= 3; return seed; -#endif } unsigned long int Eidos_GenerateRNGSeed(void) @@ -92,6 +140,8 @@ unsigned long int Eidos_GenerateRNGSeed(void) // true; if _Eidos_GenerateRNGSeed() is generating cryptographically secure seeds, that ought // to be harmless. We do this so that the seed reported to the user always matches the seed // value generated (otherwise a discrepancy is visible in SLiMgui). + // BCH 11/1/2025: Avoiding zero was needed for the old taus2 RNG in the GSL. This is not really + // needed any more, I think; but maybe avoiding zero is good anyway? int64_t seed_i64; do @@ -110,16 +160,17 @@ void _Eidos_InitializeOneRNG(Eidos_RNG_State &r) // Note that this is now called from each thread, when running parallel r.rng_last_seed_ = 0; - r.gsl_rng_ = gsl_rng_alloc(gsl_rng_taus2); // the assumption of taus2 is hard-coded in eidos_rng.h + r.pcg32_rng_.seed(0); + r.pcg64_rng_.seed(0); - r.mt_rng_.mt_ = static_cast(malloc(Eidos_MT64_NN * sizeof(uint64_t))); - r.mt_rng_.mti_ = Eidos_MT64_NN + 1; // mti==NN+1 means mt[NN] is not initialized + // we do not call gsl_rng_alloc(), because our gsl_rng instance is inline; the GSL unfortunately + // does not cater to this possibility, so we've got a bit of copied init code here from the GSL + r.gsl_rng_.type = &gEidos_GSL_RNG_PCG64; + r.gsl_rng_.state = &r.pcg64_rng_; // the "state" pointer points to our 64-bit PCG generator + //gsl_rng_set(&r.gsl_rng_, 0); // the generator was already seeded above through pcg64_rng_ r.random_bool_bit_counter_ = 0; r.random_bool_bit_buffer_ = 0; - - if (!r.gsl_rng_ || !r.mt_rng_.mt_) - EIDOS_TERMINATION << "ERROR (_Eidos_InitializeOneRNG): allocation failed; you may need to raise the memory limit for SLiM." << EidosTerminate(nullptr); } void Eidos_InitializeRNG(void) @@ -165,18 +216,8 @@ void _Eidos_FreeOneRNG(Eidos_RNG_State &r) { THREAD_SAFETY_IN_ANY_PARALLEL("_Eidos_FreeOneRNG(): RNG change"); - if (r.gsl_rng_) - { - gsl_rng_free(r.gsl_rng_); - r.gsl_rng_ = NULL; - } - - if (r.mt_rng_.mt_) - { - free(r.mt_rng_.mt_); - r.mt_rng_.mt_ = NULL; - } - r.mt_rng_.mti_ = 0; + r.gsl_rng_.type = NULL; // not owned + r.gsl_rng_.state = NULL; // not owned r.random_bool_bit_buffer_ = 0; r.random_bool_bit_counter_ = 0; @@ -214,30 +255,21 @@ void _Eidos_SetOneRNGSeed(Eidos_RNG_State &r, unsigned long int p_seed) { THREAD_SAFETY_IN_ANY_PARALLEL("_Eidos_SetOneRNGSeed(): RNG change"); - // BCH 12 Sept. 2016: it turns out that gsl_rng_taus2 produces exactly the same sequence for seeds 0 and 1. This is obviously - // undesirable; people will often do a set of runs with sequential seeds starting at 0 and counting up, and they will get - // identical runs for 0 and 1. There is no way to re-map the seed space to get rid of the problem altogether; all we can do - // is shift it to a place where it is unlikely to cause a problem. So that's what we do. - // BCH 10/2/2025: suppressing the cppcheck warning on this; it is correct, this expression is wrong, because - // unsigned long int is 32-bit on many platforms; but it isn't worth breaking backward compatibility to clean - // this up; it's really the GSL's fault for using such a vague type to begin with, they should use uintX_t. - if ((p_seed > 0) && (p_seed < 10000000000000000000UL)) // cppcheck-suppress incorrectLogicOperator - gsl_rng_set(r.gsl_rng_, p_seed + 1); // map 1 -> 2, 2-> 3, 3-> 4, etc. - else - gsl_rng_set(r.gsl_rng_, p_seed); // 0 stays 0 - - // BCH 13 May 2018: set the seed on the MT64 generator as well; we keep them synchronized in their seeding - Eidos_MT64_init_genrand64(&r.mt_rng_, p_seed); - //std::cout << "***** Setting seed " << p_seed << " on RNG" << std::endl; - // remember the seed as part of the RNG state + // pcg32_rng_ and pcg64_rng_ need the seed to be shifted left by two; the lowest two bits don't matter + // see https://github.com/imneme/pcg-cpp/issues/79 for details on this, which seems to be a bug + r.pcg32_rng_.seed(p_seed << 2); + r.pcg64_rng_.seed(p_seed << 2); + + // we seem to need to re-point gsl_rng_ to pcg64_rng_; I don't really understand quite why... + r.gsl_rng_.state = &r.pcg64_rng_; + //gsl_rng_set(&r.gsl_rng_, p_seed); // the generator was already seeded above through pcg64_rng_ - // BCH 12 Sept. 2016: we want to return the user the same seed they requested, if they call getSeed(), so we save the requested - // seed, not the seed shifted by one that is actually passed to the GSL above. + // remember the original user-supplied seed as part of the RNG state r.rng_last_seed_ = p_seed; - // These need to be zeroed out, too; they are part of our RNG state + // The random bit buffer state needs to be zeroed out, too; it is part of our RNG state r.random_bool_bit_counter_ = 0; r.random_bool_bit_buffer_ = 0; } @@ -285,104 +317,6 @@ double Eidos_FastRandomPoisson_PRECALCULATE(double p_mu) #endif -#pragma mark - -#pragma mark 64-bit MT -#pragma mark - - -// This is a 64-bit Mersenne Twister implementation. The code below is used in accordance with its license, -// reproduced in eidos_rng.h. See eidos_rng.h for further comments on this code; most of the code is there. - -/* initializes mt[NN] with a seed */ -void Eidos_MT64_init_genrand64(Eidos_MT_State *r, uint64_t seed) -{ - r->mt_[0] = seed; - for (r->mti_ = 1; r->mti_ < Eidos_MT64_NN; r->mti_++) - r->mt_[r->mti_] = (6364136223846793005ULL * (r->mt_[r->mti_ - 1] ^ (r->mt_[r->mti_ - 1] >> 62)) + r->mti_); -} - -/* initialize by an array with array-length */ -/* init_key is the array for initializing keys */ -/* key_length is its length */ -void Eidos_MT64_init_by_array64(Eidos_MT_State *r, const uint64_t init_key[], uint64_t key_length) -{ - uint64_t i, j, k; - Eidos_MT64_init_genrand64(r, 19650218ULL); - i=1; j=0; - k = (Eidos_MT64_NN>key_length ? Eidos_MT64_NN : key_length); - for (; k; k--) { - r->mt_[i] = (r->mt_[i] ^ ((r->mt_[i-1] ^ (r->mt_[i-1] >> 62)) * 3935559000370003845ULL)) - + init_key[j] + j; /* non linear */ - i++; j++; - if (i>=Eidos_MT64_NN) { r->mt_[0] = r->mt_[Eidos_MT64_NN-1]; i=1; } - if (j>=key_length) j=0; - } - for (k=Eidos_MT64_NN-1; k; k--) { - r->mt_[i] = (r->mt_[i] ^ ((r->mt_[i-1] ^ (r->mt_[i-1] >> 62)) * 2862933555777941757ULL)) - - i; /* non linear */ - i++; - if (i>=Eidos_MT64_NN) { r->mt_[0] = r->mt_[Eidos_MT64_NN-1]; i=1; } - } - - r->mt_[0] = 1ULL << 63; /* MSB is 1; assuring non-zero initial array */ -} - -/* BCH: fill the next Eidos_MT64_NN words; used internally by genrand64_int64() */ -void _Eidos_MT64_fill(Eidos_MT_State *r) -{ - /* generate NN words at one time */ - /* if init_genrand64() has not been called, */ - /* a default initial seed is used */ - int i; - static const uint64_t mag01[2]={0ULL, Eidos_MT64_MATRIX_A}; - uint64_t x; - - // In the original code, this would fall back to some default seed value, but we - // don't want to allow the RNG to be used without being seeded first. BCH 5/13/2018 - if (r->mti_ == Eidos_MT64_NN+1) - abort(); - - for (i=0;imt_[i]&Eidos_MT64_UM)|(r->mt_[i+1]&Eidos_MT64_LM); - r->mt_[i] = r->mt_[i+Eidos_MT64_MM] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; - } - for (;imt_[i]&Eidos_MT64_UM)|(r->mt_[i+1]&Eidos_MT64_LM); - r->mt_[i] = r->mt_[i+(Eidos_MT64_MM-Eidos_MT64_NN)] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; - } - x = (r->mt_[Eidos_MT64_NN-1]&Eidos_MT64_UM)|(r->mt_[0]&Eidos_MT64_LM); - r->mt_[Eidos_MT64_NN-1] = r->mt_[Eidos_MT64_MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; - - r->mti_ = 0; -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/eidos/eidos_rng.h b/eidos/eidos_rng.h index 06fb5b1cf..13aa0df51 100644 --- a/eidos/eidos_rng.h +++ b/eidos/eidos_rng.h @@ -19,7 +19,10 @@ /* - Eidos uses a globally shared random number generator called gEidos_RNG. This file defines that global and relevant helper functions. + Eidos uses a globally shared random number generator. This file defines that global and relevant helper + functions. Internally, it actually maintains a separate 32-bit and 64-bit generator seeded with the same + value, for maximum speed when only 32 bits are needed. When running multithreaded, there is one such + generator setup per thread. */ @@ -27,59 +30,67 @@ #define __Eidos__eidos_rng__ -// We have our own private copy of (parts of) the GSL library, so that we don't have link dependencies. -// See the _README file in the gsl directory for information on the local copy of the GSL included in this project. #include "gsl_rng.h" #include "gsl_randist.h" +// BCH 1 November 2025: We now use the PCG random number generator. See https://www.pcg-random.org. This +// is licensed under the Apache License, Version 2.0, which is GPL 3.0 compatible, so we have included it in +// the project. This was downloaded from the PCG website at https://www.pcg-random.org. The original paper +// on the PCG generator is at https://www.cs.hmc.edu/tr/hmc-cs-2014-0905.pdf and can be cited as: +// +// O’Neill, M. E. (2014). PCG: A family of simple fast space-efficient statistically good algorithms for +// random number generation. Harvey Mudd College, Claremont, CA: HMC-CS-2014-0905. +// +#include "pcg_random.hpp" + #include #include #include #include "eidos_globals.h" -// OK, so. This header defines the Eidos random number generator, which is now a bit of a weird hybrid. We need to use -// the GSL's RNG for most purposes, because we want to use its random distributions and so forth. However, the taus2 -// RNG that we use from the GSL only generates samples in [0, UINT32_MAX-1], and for some applications we need samples -// in [0, UINT64_MAX-1]. For that purpose, we also have a 64-bit Mersenne Twister RNG. We keep the taus2 and MT64 -// generators synchronized, in the sense that we always seed them simultaneously with the same seed value. As long as -// the user makes the same draws with the same calls, the fact that there are two generators under the hood shouldn't -// matter. This struct defines the state for the Mersenne Twister RNG. -typedef struct Eidos_MT_State -{ - uint64_t *mt_ = nullptr; // buffer of Eidos_MT64_NN uint64_t - int mti_ = 0; -} Eidos_MT_State; +// OK, so. This header defines the Eidos random number generator, which is now a bit of a weird hybrid. +// We need to use the GSL's RNG for most purposes, because we want to use its random distributions and so +// forth. However, we also want access to the RNG more directly, for greater speed; and we want to be +// able to generate both 32-bit draws (for maximum speed) and 64-bit draws (for greater range/precision). +// So we keep separate 32-bit and 64-bit generators seeded in the same way, and wrap the 64-bit generator +// with a GSL generator so that we can use it through GSL calls as well. These typedefs should be used: +typedef pcg32_fast EidosRNG_32_bit; +typedef pcg64_fast EidosRNG_64_bit; // This struct defines all of the variables associated with both RNGs; this is the complete Eidos RNG state. typedef struct Eidos_RNG_State { unsigned long int rng_last_seed_; // unsigned long int is the type used for seeds in the GSL - // GSL taus2 generator - gsl_rng *gsl_rng_; + // We now use the pcg32_fast generator to get 32-bit random numbers for a few applications + EidosRNG_32_bit pcg32_rng_; - // MT64 generator; see below - Eidos_MT_State mt_rng_; + // We now use the pcg64_fast generator to get 64-bit random numbers for most purposes + EidosRNG_64_bit pcg64_rng_; - // random coin-flip generator; based on the MT64 generator now + // Our GSL RNG simply wraps the pcg64_rng_ generator above, and contains an UNOWNED pointer to it + gsl_rng gsl_rng_; + + // random coin-flip generator; this is based on the pcg64_rng_ generator now int random_bool_bit_counter_; uint64_t random_bool_bit_buffer_; } Eidos_RNG_State; -// This is the globally shared random number generator. Note that the globals for random bit generation below are also -// considered to be part of the RNG state; if the Context plays games with swapping different RNGs in and out, those -// globals need to get swapped as well. Likewise for the last seed value; this is part of the RNG state in Eidos. -// The 64-bit Mersenne Twister is also part of the overall global RNG state. -// BCH 11/5/2022: We now keep a single Eidos_RNG_State when running single-threaded, but when running multithreaded we -// keep one Eidos_RNG_State per thread. This allows threads to get random numbers without any locking. This means -// that each thread will have its own independent random number sequence, and makes the concept of a "seed" a bit -// nebulous; in fact, each thread's RNG will be seeded with a different value so that they do not all follow the same -// sequence. The random number sequence when running multithreaded will not be reproducible; but it really can't be, -// since multithreading will divide tasks up unpredictably and execute out of linear sequence. -// BCH 12/26/2022: The per-thread RNGs are now allocated separately, on the thread that will use them, so they get -// kept in the best place in memory for that thread ("first touch"). +// This is the globally shared random number generator. Note that the globals for random bit generation below +// are also considered to be part of the RNG state; if the Context plays games with swapping different RNGs in +// and out, those globals need to get swapped as well. Likewise for the last seed value; this is part of the +// RNG state in Eidos. +// BCH 11/5/2022: We now keep a single Eidos_RNG_State when running single-threaded, but when running +// multithreaded we keep one Eidos_RNG_State per thread. This allows threads to get random numbers without +// any locking. This means that each thread will have its own independent random number sequence, and makes +// the concept of a "seed" a bit nebulous; in fact, each thread's RNG will be seeded with a different value so +// that they do not all follow the same sequence. The random number sequence when running multithreaded will +// not be reproducible; but it really can't be, since multithreading will divide tasks up unpredictably and +// execute out of linear sequence. +// BCH 12/26/2022: The per-thread RNGs are now allocated separately, on the thread that will use them, so they +// get kept in the best place in memory for that thread ("first touch"). extern bool gEidos_RNG_Initialized; #ifndef _OPENMP @@ -89,22 +100,25 @@ extern std::vector gEidos_RNG_PERTHREAD; #endif // Calls to the GSL should use these macros to get the RNG state they need, whether single- or multi-threaded. -// BCH 11/5/2022: The thread number must now be supplied. It will be zero when single-threaded, and so is ignored. -// Since this is now a bit more heavyweight, the RNG for a thread should be obtained outside of any core loops. -// The most important thing is that when there is a parallel region, the RNG is obtained INSIDE that region! -// These can be used as follows: +// BCH 11/5/2022: The thread number must now be supplied. It will be zero when single-threaded, and so is +// ignored. Since this is now a bit more heavyweight, the RNG for a thread should be obtained outside of any +// core loops. The most important thing is that when there is a parallel region, the RNG is obtained INSIDE +// that region! These can be used as follows: // -// gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); -// Eidos_MT_State *mt = EIDOS_MT_RNG(omp_get_thread_num()); +// gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); +// EidosRNG_32_bit &rng_32 = EIDOS_32BIT_RNG(omp_get_thread_num()); +// EidosRNG_64_bit &rng_64 = EIDOS_64BIT_RNG(omp_get_thread_num()); // Eidos_RNG_State *rng_state = EIDOS_STATE_RNG(omp_get_thread_num()); // #ifndef _OPENMP -#define EIDOS_GSL_RNG(threadnum) (gEidos_RNG_SINGLE.gsl_rng_) -#define EIDOS_MT_RNG(threadnum) (&gEidos_RNG_SINGLE.mt_rng_) +#define EIDOS_GSL_RNG(threadnum) (&gEidos_RNG_SINGLE.gsl_rng_) +#define EIDOS_32BIT_RNG(threadnum) (gEidos_RNG_SINGLE.pcg32_rng_) +#define EIDOS_64BIT_RNG(threadnum) (gEidos_RNG_SINGLE.pcg64_rng_) #define EIDOS_STATE_RNG(threadnum) (&gEidos_RNG_SINGLE) #else -#define EIDOS_GSL_RNG(threadnum) (gEidos_RNG_PERTHREAD[threadnum]->gsl_rng_) -#define EIDOS_MT_RNG(threadnum) (&gEidos_RNG_PERTHREAD[threadnum]->mt_rng_) +#define EIDOS_GSL_RNG(threadnum) (&gEidos_RNG_PERTHREAD[threadnum]->gsl_rng_) +#define EIDOS_32BIT_RNG(threadnum) (gEidos_RNG_PERTHREAD[threadnum]->pcg32_rng_) +#define EIDOS_64BIT_RNG(threadnum) (gEidos_RNG_PERTHREAD[threadnum]->pcg64_rng_) #define EIDOS_STATE_RNG(threadnum) (gEidos_RNG_PERTHREAD[threadnum]) #endif @@ -118,95 +132,135 @@ extern std::vector gEidos_RNG_PERTHREAD; unsigned long int Eidos_GenerateRNGSeed(void); // set up the random number generator with a given seed -void _Eidos_InitializeOneRNG(Eidos_RNG_State &r); // only for code that needs its own local RNG -void _Eidos_FreeOneRNG(Eidos_RNG_State &r); // only for code that needs its own local RNG -void _Eidos_SetOneRNGSeed(Eidos_RNG_State &r, unsigned long int p_seed); // only for code that needs its own local RNG +// these first three functions are only for code that creates its own local temporary RNG +void _Eidos_InitializeOneRNG(Eidos_RNG_State &r); +void _Eidos_FreeOneRNG(Eidos_RNG_State &r); +void _Eidos_SetOneRNGSeed(Eidos_RNG_State &r, unsigned long int p_seed); +// these next three functions set up the shared RNG used by most clients, and handle separate RNGs per-thread void Eidos_InitializeRNG(void); void Eidos_FreeRNG(void); void Eidos_SetRNGSeed(unsigned long int p_seed); +// generates an unsigned 32-bit integer -- uint32_t +inline __attribute__((always_inline)) uint32_t Eidos_rng_uniform_uint32(EidosRNG_32_bit &rng_32) +{ + RNG_INIT_CHECK(); + return rng_32(); +} -// This code is copied and modified from taus.c in the GSL library because we want to be able to inline taus_get(). -// Random number generation can be a major bottleneck in many SLiM models, so I think this is worth the grossness. -typedef struct +// generates a signed 32-bit integer -- int32_t +inline __attribute__((always_inline)) int32_t Eidos_rng_uniform_int32(EidosRNG_32_bit &rng_32) { - unsigned long int s1, s2, s3; + RNG_INIT_CHECK(); + return static_cast(rng_32()); } -taus_state_t; -inline __attribute__((always_inline)) unsigned long -taus_get_inline (void *vstate) +// generates an unsigned 64-bit integer -- uint64_t +inline __attribute__((always_inline)) uint64_t Eidos_rng_uniform_uint64(EidosRNG_64_bit &rng_64) { RNG_INIT_CHECK(); - - taus_state_t *state = static_cast(vstate); - -#define TAUS_MASK 0xffffffffUL -#define TAUSWORTHE(s,a,b,c,d) (((s &c) <>b) - - state->s1 = TAUSWORTHE (state->s1, 13, 19, 4294967294UL, 12); - state->s2 = TAUSWORTHE (state->s2, 2, 25, 4294967288UL, 4); - state->s3 = TAUSWORTHE (state->s3, 3, 11, 4294967280UL, 17); - - return (state->s1 ^ state->s2 ^ state->s3); + return rng_64(); +} + +// generates a signed 64-bit integer -- int64_t +inline __attribute__((always_inline)) int64_t Eidos_rng_uniform_int64(EidosRNG_64_bit &rng_64) +{ + RNG_INIT_CHECK(); + return static_cast(rng_64()); } -#undef TAUS_MASK -#undef TAUSWORTHE +// generates a random double in [0,1] -- including 0 and 1 (Closed-Closed) +inline __attribute__((always_inline)) double Eidos_rng_uniform_doubleCC(EidosRNG_64_bit &rng_64) +{ + RNG_INIT_CHECK(); + return (rng_64() >> 11) * (1.0/9007199254740991.0); +} +// generates a random double in [0,1) -- including 0 but NOT 1 (Closed-Open) +inline __attribute__((always_inline)) double Eidos_rng_uniform_doubleCO(EidosRNG_64_bit &rng_64) +{ + RNG_INIT_CHECK(); + return (rng_64() >> 11) * (1.0/9007199254740992.0); +} -// The gsl_rng_uniform() function is a bit slow because of the indirection it goes through to get the -// function pointer, so this is a customized version that should be faster. Basically it just hard-codes -// taus_get(); otherwise its logic is the same. The taus_get_double() function called by gsl_rng_uniform() -// has the advantage of inlining the taus_get() function, but on the other hand, Eidos_rng_uniform() is -// itself inline, which gsl_rng_uniform()'s call to taus_get_double() cannot be, so that should be a wash. -inline __attribute__((always_inline)) double Eidos_rng_uniform(gsl_rng *p_r) +// generates a random double in (0,1) -- including NEITHER 0 nor 1 (Open-Open) +inline __attribute__((always_inline)) double Eidos_rng_uniform_doubleOO(EidosRNG_64_bit &rng_64) { - return taus_get_inline(p_r->state) / 4294967296.0; + RNG_INIT_CHECK(); + return ((rng_64() >> 12) + 0.5) * (1.0/4503599627370496.0); } -// Basically ditto; faster than gsl_rng_uniform_pos() by avoiding indirection. -inline __attribute__((always_inline)) double Eidos_rng_uniform_pos(const gsl_rng *p_r) + +// generates a random unsigned 32-bit integer in the interval [0, p_n - 1] +inline __attribute__((always_inline)) uint32_t Eidos_rng_interval_uint32(EidosRNG_32_bit &rng_32, uint32_t p_n) { - double x; + // The gsl_rng_uniform_int() function is very slow, so this is a customized version that should be faster. + // Basically it is faster because (1) the range of the generator is hard-coded, (2) the range check is done + // only on #if DEBUG, and (3) it calls the generator directly, not through the GSL's pointer; otherwise the + // logic is the same. + uint32_t scale = UINT32_MAX / p_n; + uint32_t k; + +#if DEBUG + if ((p_n > INT32_MAX) || (p_n <= 0)) + { + GSL_ERROR_VAL("invalid n, either 0 or exceeds maximum value of generator", GSL_EINVAL, 0); + } +#endif do { - x = taus_get_inline(p_r->state) / 4294967296.0; + k = rng_32() / scale; } - while (x == 0); + while (k >= p_n); - return x; + return k; } -// The gsl_rng_uniform_int() function is very slow, so this is a customized version that should be faster. -// Basically it is faster because (1) the range of the taus2 generator is hard-coded, (2) the range check -// is done only on #if DEBUG, (3) it uses uint32_t, and (4) it calls taus_get() directly; otherwise the -// logic is the same. -inline __attribute__((always_inline)) uint32_t Eidos_rng_uniform_int(gsl_rng *p_r, uint32_t p_n) +// generates a random unsigned 64-bit integer in the interval [0, p_n - 1] +inline __attribute__((always_inline)) uint64_t Eidos_rng_interval_uint64(EidosRNG_64_bit &rng_64, uint64_t p_n) { - uint32_t scale = UINT32_MAX / p_n; - uint32_t k; + // The gsl_rng_uniform_int() function is very slow, so this is a customized version that should be faster. + // Basically it is faster because (1) the range of the generator is hard-coded, (2) the range check is done + // only on #if DEBUG, and (3) it calls the generator directly, not through the GSL's pointer; otherwise the + // logic is the same. + uint64_t scale = UINT64_MAX / p_n; + uint64_t k; #if DEBUG - if ((p_n > INT32_MAX) || (p_n <= 0)) + if ((p_n > INT64_MAX) || (p_n <= 0)) { - GSL_ERROR_VAL ("invalid n, either 0 or exceeds maximum value of generator", GSL_EINVAL, 0) ; + GSL_ERROR_VAL("invalid n, either 0 or exceeds maximum value of generator", GSL_EINVAL, 0); } #endif do { - k = ((uint32_t)(taus_get_inline(p_r->state))) / scale; // taus_get is used by the taus2 RNG + k = rng_64() / scale; } while (k >= p_n); return k; } +// generates a random unsigned 64-bit integer in the interval [0, p_n - 1] using a fast but slightly biased +// algorithm; this should only be used for p_n << UINT32_MAX so that the bias is undetectable +inline __attribute__((always_inline)) uint64_t Eidos_rng_interval_uint64_FAST(EidosRNG_64_bit &rng_64, uint64_t p_n) +{ + // OK, so. The GSL's uniform int method, which we replicate in Eidos_rng_interval_uint64(), makes sure + // that the probability of each integer is exactly equal by figuring out a scaling, and then looping on + // generated draws, with that scaling applied, until it gets one that is in range. Here we skip that extra + // work and just use modulo. This technically means our draws will be biased toward the low end, unless + // p_n is an exact divisor of UINT64_MAX, I guess; but UINT64_MAX is so vastly large compared to the uses + // we will put this generator to that the bias should be utterly undetectable. We are not drawing values + // in anywhere near the full range of the generator. BCH 12 May 2018 + return rng_64() % p_n; +} + + // The gsl_ran_shuffle() function leans very heavily on gsl_rng_uniform_int(), which is very slow -// as mentioned above. This is essentially same code as gsl_ran_shuffle(), but calls Eidos_rng_uniform_int(). +// as mentioned above. This is the same code as gsl_ran_shuffle(), but calls Eidos_rng_interval_uint32(). // It is also templated, to take advantage of std::swap(), which is faster than the GSL's generalized // swap() function. This uses the Fisher-Yates algorithm. BCH 12/30/2022: I tried using a different // algorithm called MergeShuffle (https://arxiv.org/abs/1508.03167), which is a parallelizable algorithm @@ -215,11 +269,21 @@ inline __attribute__((always_inline)) uint32_t Eidos_rng_uniform_int(gsl_rng *p_ // a little slower. It looks like their observed speed was due to cache locality (less of a win now, // as caches get ever bigger) and hand-tuned assembly code (not an option for us), and maybe also a // custom fast RNG optimized for generating single random bits. So, Fisher-Yates it is. -template inline void Eidos_ran_shuffle(gsl_rng *r, T *base, uint32_t n) +template inline void Eidos_ran_shuffle_uint32(EidosRNG_32_bit &rng_32, T *base, uint32_t n) { for (uint32_t i = n - 1; i > 0; i--) { - uint32_t j = Eidos_rng_uniform_int(r, (uint32_t)(i+1)); + uint32_t j = Eidos_rng_interval_uint32(rng_32, i + 1); + + std::swap(base[i], base[j]); + } +} + +template inline void Eidos_ran_shuffle_uint64(EidosRNG_64_bit &rng_64, T *base, uint64_t n) +{ + for (uint64_t i = n - 1; i > 0; i--) + { + uint64_t j = Eidos_rng_interval_uint64(rng_64, i + 1); std::swap(base[i], base[j]); } @@ -243,18 +307,18 @@ template inline void Eidos_ran_shuffle(gsl_rng *r, T *base, uint32_t n #ifndef USE_GSL_POISSON -static inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisson(gsl_rng *r, double p_mu) +inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisson(Eidos_RNG_State *rng_state, double p_mu) { RNG_INIT_CHECK(); // Defer to the GSL for large values of mu; see comments above. if (p_mu > 250) - return gsl_ran_poisson(r, p_mu); + return gsl_ran_poisson(&rng_state->gsl_rng_, p_mu); unsigned int x = 0; double p = exp(-p_mu); double s = p; - double u = Eidos_rng_uniform(r); + double u = Eidos_rng_uniform_doubleCO(rng_state->pcg64_rng_); while (u > s) { @@ -269,13 +333,13 @@ static inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisso } // This version allows the caller to supply a precalculated exp(-mu) value -static inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisson(gsl_rng *r, double p_mu, double p_exp_neg_mu) +inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisson(Eidos_RNG_State *rng_state, double p_mu, double p_exp_neg_mu) { RNG_INIT_CHECK(); // Defer to the GSL for large values of mu; see comments above. if (p_mu > 250) - return gsl_ran_poisson(r, p_mu); + return gsl_ran_poisson(&rng_state->gsl_rng_, p_mu); // Test consistency; normally this is commented out //if (p_exp_neg_mu != exp(-p_mu)) @@ -284,7 +348,7 @@ static inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisso unsigned int x = 0; double p = p_exp_neg_mu; double s = p; - double u = Eidos_rng_uniform(r); + double u = Eidos_rng_uniform_doubleCO(rng_state->pcg64_rng_); while (u > s) { @@ -301,7 +365,7 @@ static inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisso // This version specifies that the count is guaranteed not to be zero; zero has been ruled out by a previous test // The GSL declares gsl_rng* parameters as const, which seems wrong and confuses cppcheck... // cppcheck-suppress constParameterPointer -static inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisson_NONZERO(gsl_rng *r, double p_mu, double p_exp_neg_mu) +inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisson_NONZERO(Eidos_RNG_State *rng_state, double p_mu, double p_exp_neg_mu) { RNG_INIT_CHECK(); @@ -312,7 +376,7 @@ static inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisso do { - result = gsl_ran_poisson(r, p_mu); + result = gsl_ran_poisson(&rng_state->gsl_rng_, p_mu); } while (result == 0); @@ -326,7 +390,7 @@ static inline __attribute__((always_inline)) unsigned int Eidos_FastRandomPoisso unsigned int x = 0; double p = p_exp_neg_mu; double s = p; - double u = Eidos_rng_uniform_pos(r); // exclude 0.0 so u != s after rescaling + double u = Eidos_rng_uniform_doubleOO(rng_state->pcg64_rng_); // exclude 0.0 so u != s after rescaling // rescale u so that (u > s) is true in the first round u = u * (1.0 - s) + s; @@ -354,175 +418,66 @@ double Eidos_FastRandomPoisson_PRECALCULATE(double p_mu); // exp(-mu); can under #endif // USE_GSL_POISSON -#pragma mark - -#pragma mark 64-bit MT -#pragma mark - - -// This is a 64-bit Mersenne Twister implementation. The code below is used in accordance with its license, reproduced below in full. -// This code, and associated header code, is from: http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c -// Thanks to T. Nishimura and M. Matsumoto for this code. I have modified the names of symbols, and added header declarations, and -// changed "unsigned long long" to uint64_t and "long long" to int64_t, and added a little semantic sugar on top to match the GSL in -// the areas we use this, and made some of the code inlined; the core algorithm is of course completely untouched. BCH 12 May 2018. - -/* - A C-program for MT19937-64 (2004/9/29 version). - Coded by Takuji Nishimura and Makoto Matsumoto. - - This is a 64-bit version of Mersenne Twister pseudorandom number - generator. - - Before using, initialize the state by using init_genrand64(seed) - or init_by_array64(init_key, key_length). - - Copyright (C) 2004, Makoto Matsumoto and Takuji Nishimura, - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. The names of its contributors may not be used to endorse or promote - products derived from this software without specific prior written - permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - References: - T. Nishimura, ``Tables of 64-bit Mersenne Twisters'' - ACM Transactions on Modeling and - Computer Simulation 10. (2000) 348--357. - M. Matsumoto and T. Nishimura, - ``Mersenne Twister: a 623-dimensionally equidistributed - uniform pseudorandom number generator'' - ACM Transactions on Modeling and - Computer Simulation 8. (Jan. 1998) 3--30. - - Any feedback is very welcome. - http://www.math.hiroshima-u.ac.jp/~m-mat/MT/emt.html - email: m-mat @ math.sci.hiroshima-u.ac.jp (remove spaces) - */ - -#define Eidos_MT64_NN 312 -#define Eidos_MT64_MM 156 -#define Eidos_MT64_MATRIX_A 0xB5026F5AA96619E9ULL -#define Eidos_MT64_UM 0xFFFFFFFF80000000ULL /* Most significant 33 bits */ -#define Eidos_MT64_LM 0x7FFFFFFFULL /* Least significant 31 bits */ - -/* initializes mt[NN] with a seed */ -void Eidos_MT64_init_genrand64(Eidos_MT_State *r, uint64_t seed); - -/* initialize by an array with array-length */ -void Eidos_MT64_init_by_array64(Eidos_MT_State *r, const uint64_t init_key[], uint64_t key_length); - -/* BCH: fill the next Eidos_MT64_NN words; used internally by genrand64_int64() */ -void _Eidos_MT64_fill(Eidos_MT_State *r); - -/* generates a random number on [0, 2^64-1]-interval */ -inline __attribute__((always_inline)) uint64_t Eidos_MT64_genrand64_int64(Eidos_MT_State *r) -{ - RNG_INIT_CHECK(); - - /* generate NN words at one time */ - if (r->mti_ >= Eidos_MT64_NN) - _Eidos_MT64_fill(r); - - uint64_t x = r->mt_[r->mti_++]; - - x ^= (x >> 29) & 0x5555555555555555ULL; - x ^= (x << 17) & 0x71D67FFFEDA60000ULL; - x ^= (x << 37) & 0xFFF7EEE000000000ULL; - x ^= (x >> 43); - - return x; -} - -/* generates a random number on [0, 2^63-1]-interval */ -inline __attribute__((always_inline)) int64_t Eidos_MT64_genrand64_int63(Eidos_MT_State *r) -{ - return (int64_t)(Eidos_MT64_genrand64_int64(r) >> 1); -} - -/* generates a random number on [0,1]-real-interval */ -inline __attribute__((always_inline)) double Eidos_MT64_genrand64_real1(Eidos_MT_State *r) -{ - return (Eidos_MT64_genrand64_int64(r) >> 11) * (1.0/9007199254740991.0); -} - -/* generates a random number on [0,1)-real-interval */ -inline __attribute__((always_inline)) double Eidos_MT64_genrand64_real2(Eidos_MT_State *r) -{ - return (Eidos_MT64_genrand64_int64(r) >> 11) * (1.0/9007199254740992.0); -} - -/* generates a random number on (0,1)-real-interval */ -inline __attribute__((always_inline)) double Eidos_MT64_genrand64_real3(Eidos_MT_State *r) -{ - return ((Eidos_MT64_genrand64_int64(r) >> 12) + 0.5) * (1.0/4503599627370496.0); -} - -/* BCH: generates a random integer in [0, p_n - 1]; parallel to Eidos_rng_uniform_int() above */ -inline __attribute__((always_inline)) uint64_t Eidos_rng_uniform_int_MT64(Eidos_MT_State *r, uint64_t p_n) -{ - // OK, so. The GSL's uniform int method, whose logic we replicate in Eidos_rng_uniform_int(), makes sure - // that the probability of each integer is exactly equal by figuring out a scaling, and then looping on - // generated draws, with that scaling applied, until it gets one that is in range. Here we skip that extra - // work and just use modulo. This technically means our draws will be biased toward the low end, unless - // p_n is an exact divisor of UINT64_MAX, I guess; but UINT64_MAX is so vastly large compared to the uses - // we will put this generator to that the bias should be utterly undetectable. We are not drawing values - // in anywhere near the full range of the generator; we just need a couple of orders of magnitude more - // headroom than UINT32_MAX provides. If we start to use this for a wider range of p_n (such as making it - // available in the Eidos APIs), this decision would need to be revisited. BCH 12 May 2018 - return Eidos_MT64_genrand64_int64(r) % p_n; -} - - #pragma mark - #pragma mark Random coin-flips #pragma mark - // get a random bool from a random number generator -//static inline bool Eidos_RandomBool(gsl_rng *p_r) { return (bool)(taus_get(p_r->state) & 0x01); } +//inline bool Eidos_RandomBool(gsl_rng *p_r) { return (bool)(taus_get(p_r->state) & 0x01); } // optimization of this is possible assuming each bit returned by the RNG is independent and usable as a random boolean. // the independence of all 64 bits seems to be a solid assumption for the MT64 generator, as far as I can tell. -static inline __attribute__((always_inline)) bool Eidos_RandomBool(Eidos_RNG_State *r) +inline __attribute__((always_inline)) bool Eidos_RandomBool(Eidos_RNG_State *rng_state) { RNG_INIT_CHECK(); +#if 0 + // This method gets one uint64_t at a time, and uses all 64 of its bits as random bools. This is + // aesthetically nice, but does have overhead for the branch, and for reading and writing the state. bool retval; - if (r->random_bool_bit_counter_ > 0) + if (rng_state->random_bool_bit_counter_ > 0) { - r->random_bool_bit_counter_--; - r->random_bool_bit_buffer_ >>= 1; - retval = r->random_bool_bit_buffer_ & 0x01; + rng_state->random_bool_bit_counter_--; + rng_state->random_bool_bit_buffer_ >>= 1; + retval = rng_state->random_bool_bit_buffer_ & 0x01; } else { - r->random_bool_bit_buffer_ = Eidos_MT64_genrand64_int64(&r->mt_rng_); // MT64 provides 64 independent bits - r->random_bool_bit_counter_ = 63; // 64 good bits originally, and we're about to use one + rng_state->random_bool_bit_buffer_ = rng_state->pcg64_rng_(); // pcg64 provides 64 independent bits + rng_state->random_bool_bit_counter_ = 63; // 64 good bits originally, and we're about to use one - retval = r->random_bool_bit_buffer_ & 0x01; + retval = rng_state->random_bool_bit_buffer_ & 0x01; } return retval; +#elif 1 + // BCH 11/1/2025: This is an optimized version of the version above. Rather than shifting the bit buffer + // down by 1 and writing it out again, it shifts down by the count each time, so it doesn't need to write it. + int bit_counter = rng_state->random_bool_bit_counter_; + + if (bit_counter > 0) + { + rng_state->random_bool_bit_counter_ = --bit_counter; + + return (rng_state->random_bool_bit_buffer_ >> bit_counter) & 0x01; + } + else + { + uint64_t next_uint64 = rng_state->pcg64_rng_(); // pcg64 provides 64 independent bits + + rng_state->random_bool_bit_buffer_ = next_uint64; + rng_state->random_bool_bit_counter_ = 63; // 64 good bits originally, and we're about to use one + + return (next_uint64 >> 63); + } +#else + // This version simply generates a new uint32 each time and uses its lowest-order bit; I was curious to + // see whether, with the pcg32_fast generator, this might actually be faster (no branches), but it isn't. + uint32_t next_uint32 = rng_state->pcg32_rng_(); // pcg32 provides a good low-order bit + + return (next_uint32 & 0x01); +#endif } diff --git a/eidos/eidos_test.cpp b/eidos/eidos_test.cpp index 7d3ae7278..11d458a2a 100644 --- a/eidos/eidos_test.cpp +++ b/eidos/eidos_test.cpp @@ -1286,7 +1286,7 @@ int RunEidosTests(void) { std::cout << std::endl << "SORTING TESTS:" << std::endl; - gsl_rng *rng = EIDOS_GSL_RNG(omp_get_thread_num()); // the single-threaded RNG + gsl_rng *rng_gsl = EIDOS_GSL_RNG(omp_get_thread_num()); // the single-threaded RNG typedef std::string SORT_TYPE; { diff --git a/eidos/eidos_test_functions_statistics.cpp b/eidos/eidos_test_functions_statistics.cpp index 2ddfa22aa..b6c322bee 100644 --- a/eidos/eidos_test_functions_statistics.cpp +++ b/eidos/eidos_test_functions_statistics.cpp @@ -737,8 +737,8 @@ void _RunFunctionDistributionTests(void) // rbeta() EidosAssertScriptSuccess("rbeta(0, 1, 1000);", gStaticEidosValue_Float_ZeroVec); EidosAssertScriptSuccess("rbeta(0, float(0), float(0));", gStaticEidosValue_Float_ZeroVec); - EidosAssertScriptSuccess_L("setSeed(0); abs(rbeta(1, 1, 5) - c(0.115981)) < 0.0001;", true); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rbeta(3, 1, 5) - c(0.115981, 0.0763773, 0.05032)) < 0.0001;", {true, true, true}); + EidosAssertScriptSuccess_L("setSeed(0); abs(rbeta(1, 1, 5) - c(0.192527)) < 0.0001;", true); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rbeta(3, 1, 5) - c(0.192527, 0.235423, 0.365635)) < 0.0001;", {true, true, true}); EidosAssertScriptRaise("rbeta(-1, 1, 1000);", 0, "requires n to be"); EidosAssertScriptRaise("rbeta(2, 0, 1);", 0, "requires alpha > 0.0"); EidosAssertScriptRaise("rbeta(2, c(1,0), 1);", 0, "requires alpha > 0.0"); @@ -756,14 +756,14 @@ void _RunFunctionDistributionTests(void) EidosAssertScriptSuccess_IV("rbinom(3, 10, 1.0);", {10, 10, 10}); EidosAssertScriptSuccess_IV("rbinom(3, 0, 0.0);", {0, 0, 0}); EidosAssertScriptSuccess_IV("rbinom(3, 0, 1.0);", {0, 0, 0}); - EidosAssertScriptSuccess_IV("setSeed(0); rbinom(10, 1, 0.5);", {0, 1, 1, 1, 1, 1, 0, 0, 0, 0}); - EidosAssertScriptSuccess_IV("setSeed(0); rbinom(10, 1, 0.5000001);", {1, 0, 0, 1, 1, 0, 1, 0, 1, 0}); - EidosAssertScriptSuccess_IV("setSeed(0); rbinom(5, 10, 0.5);", {4, 8, 5, 3, 4}); - EidosAssertScriptSuccess_IV("setSeed(1); rbinom(5, 10, 0.5);", {7, 6, 3, 6, 3}); - EidosAssertScriptSuccess_IV("setSeed(2); rbinom(5, 1000, 0.01);", {11, 16, 10, 14, 10}); - EidosAssertScriptSuccess_IV("setSeed(3); rbinom(5, 1000, 0.99);", {992, 990, 995, 991, 995}); - EidosAssertScriptSuccess_IV("setSeed(4); rbinom(3, 100, c(0.1, 0.5, 0.9));", {7, 50, 87}); - EidosAssertScriptSuccess_IV("setSeed(5); rbinom(3, c(10, 30, 50), 0.5);", {6, 12, 26}); + EidosAssertScriptSuccess_IV("setSeed(0); rbinom(10, 1, 0.5);", {0, 0, 0, 0, 1, 1, 0, 1, 0, 1}); + EidosAssertScriptSuccess_IV("setSeed(0); rbinom(10, 1, 0.5000001);", {1, 1, 1, 1, 1, 1, 0, 1, 0, 1}); + EidosAssertScriptSuccess_IV("setSeed(0); rbinom(5, 10, 0.5);", {2, 3, 4, 4, 3}); + EidosAssertScriptSuccess_IV("setSeed(1); rbinom(5, 10, 0.5);", {5, 5, 7, 4, 8}); + EidosAssertScriptSuccess_IV("setSeed(2); rbinom(5, 1000, 0.01);", {8, 8, 11, 7, 6}); + EidosAssertScriptSuccess_IV("setSeed(3); rbinom(5, 1000, 0.99);", {992, 996, 989, 988, 990}); + EidosAssertScriptSuccess_IV("setSeed(4); rbinom(3, 100, c(0.1, 0.5, 0.9));", {6, 59, 89}); + EidosAssertScriptSuccess_IV("setSeed(5); rbinom(3, c(10, 30, 50), 0.5);", {4, 13, 22}); EidosAssertScriptRaise("rbinom(-1, 10, 0.5);", 0, "requires n to be"); EidosAssertScriptRaise("rbinom(3, -1, 0.5);", 0, "requires size >= 0"); EidosAssertScriptRaise("rbinom(3, 10, -0.1);", 0, "in [0.0, 1.0]"); @@ -777,11 +777,11 @@ void _RunFunctionDistributionTests(void) // rcauchy() EidosAssertScriptSuccess("rcauchy(0);", gStaticEidosValue_Float_ZeroVec); EidosAssertScriptSuccess("rcauchy(0, float(0), float(0));", gStaticEidosValue_Float_ZeroVec); - EidosAssertScriptSuccess_LV("setSeed(0); (rcauchy(2) - c(0.665522, -0.155038)) < 0.00001;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(0); (rcauchy(2, 10.0) - c(10.6655, 9.84496)) < 0.001;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(2); (rcauchy(2, 10.0, 100.0) - c(-255.486, -4.66262)) < 0.001;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(3); (rcauchy(2, c(-10, 10), 100.0) - c(89.8355, 1331.82)) < 0.01;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(4); (rcauchy(2, 10.0, c(0.1, 10)) - c(10.05, -4.51227)) < 0.001;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rcauchy(2) - c(0.16713, 0.595204)) < 0.00001;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rcauchy(2, 10.0) - c(10.1671, 10.5952)) < 0.001;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(2); abs(rcauchy(2, 10.0, 100.0) - c(110.349, 122.822)) < 0.001;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(3); abs(rcauchy(2, c(-10, 10), 100.0) - c(133.494, 14.8875)) < 0.01;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(4); abs(rcauchy(2, 10.0, c(0.1, 10)) - c(10.0284, 8.47099)) < 0.001;", {true, true}); EidosAssertScriptRaise("rcauchy(-1);", 0, "requires n to be"); EidosAssertScriptRaise("rcauchy(1, 0, 0);", 0, "requires scale > 0.0"); EidosAssertScriptRaise("rcauchy(2, c(0,0), -1);", 0, "requires scale > 0.0"); @@ -798,13 +798,13 @@ void _RunFunctionDistributionTests(void) EidosAssertScriptSuccess_IV("rdunif(1, 1, 1);", {1}); EidosAssertScriptSuccess_IV("rdunif(3, 1, 1);", {1, 1, 1}); EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(1), 0);", true); - EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(10), c(0,1,1,1,1,1,0,0,0,0));", true); - EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(10, 10, 11), c(10,11,11,11,11,11,10,10,10,10));", true); - EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(10, 10, 15), c(10, 15, 11, 10, 14, 12, 11, 10, 12, 15));", true); - EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(10, -10, 15), c(-6, 9, 13, 8, -10, -2, 1, -2, 4, -9));", true); - EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(5, 1000000, 2000000), c(1834587, 1900900, 1272746, 1916963, 1786506));", true); - EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(5, 1000000000, 2000000000), c(1824498419, 1696516320, 1276316141, 1114192161, 1469447550));", true); - EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(5, 10000000000, 20000000000), c(18477398967, 14168180191, 12933243864, 17033840166, 15472500391));", true); // 64-bit range + EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(10), c(0, 0, 0, 0, 1, 1, 0, 1, 0, 1));", true); + EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(10, 10, 11), c(10, 10, 10, 10, 11, 11, 10, 11, 10, 11));", true); + EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(10, 10, 15), c(10, 11, 11, 11, 10, 11, 14, 11, 14, 11));", true); + EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(10, -10, 15), c(-9, -6, -4, -2, -8, -3, 9, -2, 10, -2));", true); + EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(5, 1000000, 2000000), c(1052712, 1170896, 1269062, 1323101, 1105484));", true); + EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(5, 1000000000, 2000000000), c(1052712017, 1170896080, 1269062279, 1323101491, 1105484052));", true); + EidosAssertScriptSuccess_L("setSeed(0); identical(rdunif(5, 10000000000, 20000000000), c(10527120177, 11708960806, 12690622795, 13231014907, 11054840521));", true); // 64-bit range EidosAssertScriptRaise("rdunif(-1);", 0, "requires n to be"); EidosAssertScriptRaise("rdunif(1, 0, -1);", 0, "requires min <= max"); EidosAssertScriptRaise("rdunif(2, 0, c(7, -1));", 0, "requires min <= max"); @@ -828,11 +828,11 @@ void _RunFunctionDistributionTests(void) // rexp() EidosAssertScriptSuccess("rexp(0);", gStaticEidosValue_Float_ZeroVec); EidosAssertScriptSuccess("rexp(0, float(0));", gStaticEidosValue_Float_ZeroVec); - EidosAssertScriptSuccess_L("setSeed(0); abs(rexp(1) - c(0.206919)) < 0.00001;", true); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rexp(3) - c(0.206919, 3.01675, 0.788416)) < 0.00001;", {true, true, true}); - EidosAssertScriptSuccess_LV("setSeed(1); abs(rexp(3, 10) - c(20.7, 12.2, 0.9)) < 0.1;", {true, true, true}); - EidosAssertScriptSuccess_LV("setSeed(2); abs(rexp(3, 100000) - c(95364.3, 307170.0, 74334.9)) < 0.1;", {true, true, true}); - EidosAssertScriptSuccess_LV("setSeed(3); abs(rexp(3, c(10, 100, 1000)) - c(2.8, 64.6, 58.8)) < 0.1;", {true, true, true}); + EidosAssertScriptSuccess_L("setSeed(0); abs(rexp(1) - c(0.0541521)) < 0.00001;", true); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rexp(3) - c(0.0541521, 0.18741, 0.313427)) < 0.00001;", {true, true, true}); + EidosAssertScriptSuccess_LV("setSeed(1); abs(rexp(3, 10) - c(6.36062, 6.28821, 19.0056)) < 0.1;", {true, true, true}); + EidosAssertScriptSuccess_LV("setSeed(2); abs(rexp(3, 100000) - c(28842.1, 31355.3, 102831.0)) < 0.1;", {true, true, true}); + EidosAssertScriptSuccess_LV("setSeed(3); abs(rexp(3, c(10, 100, 1000)) - c(3.65665, 1.5667, 948.946)) < 0.1;", {true, true, true}); EidosAssertScriptRaise("rexp(-1);", 0, "requires n to be"); EidosAssertScriptRaise("rexp(3, c(10, 5));", 0, "requires mu to be"); EidosAssertScriptSuccess("rexp(1, NAN);", gStaticEidosValue_FloatNAN); @@ -856,11 +856,11 @@ void _RunFunctionDistributionTests(void) // rf() EidosAssertScriptSuccess("rf(0, 10, 15);", gStaticEidosValue_Float_ZeroVec); EidosAssertScriptSuccess("rf(0, float(0), float(0));", gStaticEidosValue_Float_ZeroVec); - EidosAssertScriptSuccess_L("setSeed(0); abs(rf(1, 2, 3) - c(0.568968)) < 0.0001;", true); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rf(3, 2, 3) - c(0.568968, 0.533479, 0.316429)) < 0.0001;", {true, true, true}); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rf(3, 2, 4) - c(0.588202, 0.486162, 0.295787)) < 0.0001;", {true, true, true}); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rf(3, c(2,2,2), 4) - c(0.588202, 0.486162, 0.295787)) < 0.0001;", {true, true, true}); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rf(3, 2, c(4,4,4)) - c(0.588202, 0.486162, 0.295787)) < 0.0001;", {true, true, true}); + EidosAssertScriptSuccess_L("setSeed(0); abs(rf(1, 2, 3) - c(1.95702)) < 0.0001;", true); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rf(3, 2, 3) - c(1.95702, 2.85093, 2.42374)) < 0.0001;", {true, true, true}); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rf(3, 2, 4) - c(1.64284, 2.29314, 2.52913)) < 0.0001;", {true, true, true}); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rf(3, c(2,2,2), 4) - c(1.64284, 2.29314, 2.52913)) < 0.0001;", {true, true, true}); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rf(3, 2, c(4,4,4)) - c(1.64284, 2.29314, 2.52913)) < 0.0001;", {true, true, true}); EidosAssertScriptRaise("rf(-1, 10, 15);", 0, "requires n to be"); EidosAssertScriptRaise("rf(2, 0, 15);", 0, "requires d1 > 0.0"); EidosAssertScriptRaise("rf(2, 10, 0);", 0, "requires d2 > 0.0"); @@ -875,11 +875,11 @@ void _RunFunctionDistributionTests(void) EidosAssertScriptSuccess("rgamma(0, 0, 1000);", gStaticEidosValue_Float_ZeroVec); EidosAssertScriptSuccess("rgamma(0, float(0), float(0));", gStaticEidosValue_Float_ZeroVec); EidosAssertScriptSuccess_FV("rgamma(3, 0, 1000);", {0.0, 0.0, 0.0}); - EidosAssertScriptSuccess_L("setSeed(0); abs(rgamma(1, 1, 100) - c(1.02069)) < 0.0001;", true); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rgamma(3, 1, 100) - c(1.02069, 1.0825, 0.951862)) < 0.0001;", {true, true, true}); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rgamma(3, -1, 100) - c(-1.02069, -1.0825, -0.951862)) < 0.0001;", {true, true, true}); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rgamma(3, c(-1,-1,-1), 100) - c(-1.02069, -1.0825, -0.951862)) < 0.0001;", {true, true, true}); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rgamma(3, -1, c(100,100,100)) - c(-1.02069, -1.0825, -0.951862)) < 0.0001;", {true, true, true}); + EidosAssertScriptSuccess_L("setSeed(0); abs(rgamma(1, 1, 100) - c(1.01615)) < 0.0001;", true); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rgamma(3, 1, 100) - c(1.01615, 0.939423, 1.03091)) < 0.0001;", {true, true, true}); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rgamma(3, -1, 100) - c(-1.01615, -0.939423, -1.03091)) < 0.0001;", {true, true, true}); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rgamma(3, c(-1,-1,-1), 100) - c(-1.01615, -0.939423, -1.03091)) < 0.0001;", {true, true, true}); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rgamma(3, -1, c(100,100,100)) - c(-1.01615, -0.939423, -1.03091)) < 0.0001;", {true, true, true}); EidosAssertScriptRaise("rgamma(-1, 0, 1000);", 0, "requires n to be"); EidosAssertScriptRaise("rgamma(2, 0, 0);", 0, "requires shape > 0.0"); EidosAssertScriptRaise("rgamma(2, c(0,0), 0);", 0, "requires shape > 0.0"); @@ -892,11 +892,11 @@ void _RunFunctionDistributionTests(void) EidosAssertScriptSuccess("rgeom(0, 1.0);", gStaticEidosValue_Integer_ZeroVec); EidosAssertScriptSuccess_IV("rgeom(1, 1.0);", {0}); EidosAssertScriptSuccess_IV("rgeom(5, 1.0);", {0, 0, 0, 0, 0}); - EidosAssertScriptSuccess_IV("setSeed(1); rgeom(5, 0.2);", {0, 1, 10, 1, 10}); - EidosAssertScriptSuccess_IV("setSeed(1); rgeom(5, 0.4);", {0, 0, 4, 0, 4}); - EidosAssertScriptSuccess_IV("setSeed(5); rgeom(5, 0.01);", {31, 31, 299, 129, 58}); - EidosAssertScriptSuccess_IV("setSeed(2); rgeom(1, 0.0001);", {4866}); - EidosAssertScriptSuccess_IV("setSeed(3); rgeom(6, c(1, 0.1, 0.01, 0.001, 0.0001, 0.00001));", {0, 13, 73, 2860, 8316, 282489}); + EidosAssertScriptSuccess_IV("setSeed(1); rgeom(5, 0.2);", {3, 3, 0, 6, 0}); + EidosAssertScriptSuccess_IV("setSeed(1); rgeom(5, 0.4);", {1, 1, 0, 2, 0}); + EidosAssertScriptSuccess_IV("setSeed(5); rgeom(5, 0.01);", {140, 18, 201, 107, 368}); + EidosAssertScriptSuccess_IV("setSeed(2); rgeom(1, 0.0001);", {13840}); + EidosAssertScriptSuccess_IV("setSeed(3); rgeom(6, c(1, 0.1, 0.01, 0.001, 0.0001, 0.00001));", {0, 11, 414, 489, 3479, 62929}); EidosAssertScriptRaise("rgeom(-1, 1.0);", 0, "requires n to be"); EidosAssertScriptRaise("rgeom(0, 0.0);", 0, "requires 0.0 < p <= 1.0"); EidosAssertScriptRaise("rgeom(0, 1.1);", 0, "requires 0.0 < p <= 1.0"); @@ -939,14 +939,14 @@ void _RunFunctionDistributionTests(void) EidosAssertScriptSuccess_IV("rnbinom(3, 10.0, 1.0);", {0, 0, 0}); EidosAssertScriptRaise("rnbinom(3, 0, 0.0);", 0, "probability in (0.0, 1.0]"); EidosAssertScriptSuccess_IV("rnbinom(3, 0, 1.0);", {0, 0, 0}); - EidosAssertScriptSuccess_IV("setSeed(0); rnbinom(10, 1, 0.5);", {1, 0, 0, 0, 0, 1, 0, 0, 2, 2}); - EidosAssertScriptSuccess_IV("setSeed(0); rnbinom(10, 1, 0.5000001);", {1, 0, 0, 0, 0, 1, 0, 0, 2, 2}); - EidosAssertScriptSuccess_IV("setSeed(0); rnbinom(5, 10, 0.5);", {6, 13, 1, 6, 5}); - EidosAssertScriptSuccess_IV("setSeed(1); rnbinom(5, 10, 0.5);", {2, 6, 9, 10, 7}); - EidosAssertScriptSuccess_IV("setSeed(2); rnbinom(5, 1000, 0.01);", {103776, 97182, 94313, 95927, 92216}); - EidosAssertScriptSuccess_IV("setSeed(3); rnbinom(5, 1000, 0.99);", {6, 6, 8, 5, 15}); - EidosAssertScriptSuccess_IV("setSeed(4); rnbinom(3, 100, c(0.1, 0.5, 0.9));", {842, 125, 11}); - EidosAssertScriptSuccess_IV("setSeed(5); rnbinom(3, c(10, 30, 50), 0.5);", {16, 26, 45}); + EidosAssertScriptSuccess_IV("setSeed(0); rnbinom(10, 1, 0.5);", {0, 0, 0, 0, 0, 0, 0, 0, 1, 0}); + EidosAssertScriptSuccess_IV("setSeed(0); rnbinom(10, 1, 0.5000001);", {0, 0, 0, 0, 0, 0, 0, 0, 1, 0}); + EidosAssertScriptSuccess_IV("setSeed(0); rnbinom(5, 10, 0.5);", {9, 6, 4, 8, 8}); + EidosAssertScriptSuccess_IV("setSeed(1); rnbinom(5, 10, 0.5);", {15, 12, 6, 9, 2}); + EidosAssertScriptSuccess_IV("setSeed(2); rnbinom(5, 1000, 0.01);", {95823, 100000, 100280, 104485, 99476}); + EidosAssertScriptSuccess_IV("setSeed(3); rnbinom(5, 1000, 0.99);", {10, 7, 18, 2, 8}); + EidosAssertScriptSuccess_IV("setSeed(4); rnbinom(3, 100, c(0.1, 0.5, 0.9));", {933, 75, 13}); + EidosAssertScriptSuccess_IV("setSeed(5); rnbinom(3, c(10, 30, 50), 0.5);", {5, 34, 50}); EidosAssertScriptRaise("rnbinom(-1, 10, 0.5);", 0, "requires n to be"); EidosAssertScriptRaise("rnbinom(3, -1, 0.5);", 0, "requires size >= 0"); EidosAssertScriptRaise("rnbinom(3, 10, -0.1);", 0, "in (0.0, 1.0]"); @@ -965,11 +965,11 @@ void _RunFunctionDistributionTests(void) EidosAssertScriptSuccess_FV("rnorm(3, 0, 0);", {0.0, 0.0, 0.0}); EidosAssertScriptSuccess_FV("rnorm(1, 1, 0);", {1.0}); EidosAssertScriptSuccess_FV("rnorm(3, 1, 0);", {1.0, 1.0, 1.0}); - EidosAssertScriptSuccess_LV("setSeed(0); (rnorm(2) - c(-0.785386, 0.132009)) < 0.000001;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(1); (rnorm(2, 10.0) - c(10.38, 10.26)) < 0.01;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(2); (rnorm(2, 10.0, 100.0) - c(59.92, 95.35)) < 0.01;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(3); (rnorm(2, c(-10, 10), 100.0) - c(59.92, 95.35)) < 0.01;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(4); (rnorm(2, 10.0, c(0.1, 10)) - c(59.92, 95.35)) < 0.01;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rnorm(2) - c(-0.895053, -0.315753)) < 0.000001;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(1); abs(rnorm(2, 10.0) - c(7.6679, 9.5468)) < 0.01;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(2); abs(rnorm(2, 10.0, 100.0) - c(-74.4017, -107.421)) < 0.01;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(3); abs(rnorm(2, c(-10, 10), 100.0) - c(142.441, 121.46)) < 0.01;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(4); abs(rnorm(2, 10.0, c(0.1, 10)) - c(10.0681, 12.2818)) < 0.01;", {true, true}); EidosAssertScriptRaise("rnorm(-1);", 0, "requires n to be"); EidosAssertScriptRaise("rnorm(1, 0, -1);", 0, "requires sd >= 0.0"); EidosAssertScriptRaise("rnorm(2, c(0,0), -1);", 0, "requires sd >= 0.0"); @@ -982,11 +982,11 @@ void _RunFunctionDistributionTests(void) // rpois() EidosAssertScriptSuccess("rpois(0, 1.0);", gStaticEidosValue_Integer_ZeroVec); - EidosAssertScriptSuccess_IV("setSeed(0); rpois(5, 1.0);", {0, 2, 0, 1, 1}); - EidosAssertScriptSuccess_IV("setSeed(1); rpois(5, 0.2);", {1, 0, 0, 0, 0}); - EidosAssertScriptSuccess_IV("setSeed(2); rpois(5, 10000);", {10205, 10177, 10094, 10227, 9875}); - EidosAssertScriptSuccess_IV("setSeed(2); rpois(1, 10000);", {10205}); - EidosAssertScriptSuccess_IV("setSeed(3); rpois(5, c(1, 10, 100, 1000, 10000));", {0, 8, 97, 994, 9911}); + EidosAssertScriptSuccess_IV("setSeed(0); rpois(10, 1.0);", {0, 0, 0, 0, 0, 0, 1, 1, 1, 0}); + EidosAssertScriptSuccess_IV("setSeed(1); rpois(5, 0.2);", {0, 0, 1, 1, 0}); + EidosAssertScriptSuccess_IV("setSeed(2); rpois(5, 10000);", {9854, 10025, 9953, 10049, 9917}); + EidosAssertScriptSuccess_IV("setSeed(2); rpois(1, 10000);", {9854}); + EidosAssertScriptSuccess_IV("setSeed(3); rpois(5, c(1, 10, 100, 1000, 10000));", {0, 8, 109, 1014, 10020}); EidosAssertScriptRaise("rpois(-1, 1.0);", 0, "requires n to be"); EidosAssertScriptRaise("rpois(0, 0.0);", 0, "requires lambda > 0.0"); EidosAssertScriptRaise("rpois(0, NAN);", 0, "requires lambda > 0.0"); @@ -1001,12 +1001,12 @@ void _RunFunctionDistributionTests(void) EidosAssertScriptSuccess_FV("runif(3, 0, 0);", {0.0, 0.0, 0.0}); EidosAssertScriptSuccess_FV("runif(1, 1, 1);", {1.0}); EidosAssertScriptSuccess_FV("runif(3, 1, 1);", {1.0, 1.0, 1.0}); - EidosAssertScriptSuccess_L("setSeed(0); abs(runif(1) - c(0.186915)) < 0.000001;", true); - EidosAssertScriptSuccess_LV("setSeed(0); abs(runif(2) - c(0.186915, 0.951040)) < 0.000001;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(1); abs(runif(2, 0.5) - c(0.93, 0.85)) < 0.01;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(2); abs(runif(2, 10.0, 100.0) - c(65.31, 95.82)) < 0.01;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(3); abs(runif(2, c(-100, 1), 10.0) - c(-72.52, 5.28)) < 0.01;", {true, true}); - EidosAssertScriptSuccess_LV("setSeed(4); abs(runif(2, -10.0, c(1, 1000)) - c(-8.37, 688.97)) < 0.01;", {true, true}); + EidosAssertScriptSuccess_L("setSeed(0); abs(runif(1) - c(0.052712)) < 0.000001;", true); + EidosAssertScriptSuccess_LV("setSeed(0); abs(runif(2) - c(0.052712, 0.170896)) < 0.000001;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(1); abs(runif(2, 0.5) - c(0.735314, 0.73339)) < 0.0001;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(2); abs(runif(2, 10.0, 100.0) - c(32.5498, 34.2239)) < 0.0001;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(3); abs(runif(2, c(-100, 1), 10.0) - c(-66.3109, 1.1399)) < 0.0001;", {true, true}); + EidosAssertScriptSuccess_LV("setSeed(4); abs(runif(2, -10.0, c(1, 1000)) - c(-9.03193, 951.221)) < 0.001;", {true, true}); EidosAssertScriptRaise("runif(-1);", 0, "requires n to be"); EidosAssertScriptRaise("runif(1, 0, -1);", 0, "requires min < max"); EidosAssertScriptRaise("runif(2, 0, c(7,-1));", 0, "requires min < max"); @@ -1018,8 +1018,8 @@ void _RunFunctionDistributionTests(void) // rweibull() EidosAssertScriptSuccess("rweibull(0, 1, 1);", gStaticEidosValue_Float_ZeroVec); EidosAssertScriptSuccess("rweibull(0, float(0), float(0));", gStaticEidosValue_Float_ZeroVec); - EidosAssertScriptSuccess_L("setSeed(0); abs(rweibull(1, 1, 1) - c(1.6771)) < 0.0001;", true); - EidosAssertScriptSuccess_LV("setSeed(0); abs(rweibull(3, 1, 1) - c(1.6771, 0.0501994, 0.60617)) < 0.0001;", {true, true, true}); + EidosAssertScriptSuccess_L("setSeed(0); abs(rweibull(1, 1, 1) - c(2.94291)) < 0.0001;", true); + EidosAssertScriptSuccess_LV("setSeed(0); abs(rweibull(3, 1, 1) - c(2.94291, 1.7667, 1.31281)) < 0.0001;", {true, true, true}); EidosAssertScriptRaise("rweibull(1, 0, 1);", 0, "requires lambda > 0.0"); EidosAssertScriptRaise("rweibull(1, 1, 0);", 0, "requires k > 0.0"); EidosAssertScriptRaise("rweibull(3, c(1,1,0), 1);", 0, "requires lambda > 0.0"); @@ -1032,11 +1032,11 @@ void _RunFunctionDistributionTests(void) // rztpois() EidosAssertScriptSuccess("rztpois(0, 1.0);", gStaticEidosValue_Integer_ZeroVec); - EidosAssertScriptSuccess_IV("setSeed(0); rztpois(5, 1.0);", {1, 3, 1, 1, 1}); - EidosAssertScriptSuccess_IV("setSeed(1); rztpois(5, 0.2);", {1, 1, 1, 1, 1}); - EidosAssertScriptSuccess_IV("setSeed(2); rztpois(5, 10000);", {10205, 10177, 10094, 10227, 9875}); - EidosAssertScriptSuccess_IV("setSeed(2); rztpois(1, 10000);", {10205}); - EidosAssertScriptSuccess_IV("setSeed(3); rztpois(5, c(1, 10, 100, 1000, 10000));", {1, 10, 84, 1037, 9946}); + EidosAssertScriptSuccess_IV("setSeed(0); rztpois(10, 1.0);", {1, 1, 1, 1, 1, 1, 2, 1, 2, 1}); + EidosAssertScriptSuccess_IV("setSeed(1); rztpois(5, 0.2);", {1, 1, 1, 1, 2}); + EidosAssertScriptSuccess_IV("setSeed(2); rztpois(5, 10000);", {9854, 10025, 9953, 10049, 9917}); + EidosAssertScriptSuccess_IV("setSeed(2); rztpois(1, 10000);", {9854}); + EidosAssertScriptSuccess_IV("setSeed(3); rztpois(5, c(1, 10, 100, 1000, 10000));", {1, 4, 103, 1011, 10091}); EidosAssertScriptRaise("rztpois(-1, 1.0);", 0, "requires n to be"); EidosAssertScriptRaise("rztpois(0, 0.0);", 0, "requires lambda > 0.0"); EidosAssertScriptRaise("rztpois(0, NAN);", 0, "requires lambda > 0.0"); diff --git a/eidos/eidos_test_functions_vector.cpp b/eidos/eidos_test_functions_vector.cpp index 9cbfa5a7e..be4aeaaf0 100644 --- a/eidos/eidos_test_functions_vector.cpp +++ b/eidos/eidos_test_functions_vector.cpp @@ -229,16 +229,16 @@ void _RunFunctionVectorConstructionTests_s_through_z(void) EidosAssertScriptRaise("sample(5, 2, T, NAN);", 0, "requires all weights to be"); EidosAssertScriptRaise("sample(1:5, 2, T, c(1,2,NAN,4,5));", 0, "requires all weights to be"); EidosAssertScriptRaise("sample(5, 2, F);", 0, "insufficient elements"); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, T);", {1, 5, 3, 1, 2}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, F);", {3, 5, 2, 4, 1}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 6, T);", {1, 5, 3, 1, 2, 3}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, T);", {1, 5, 2, 1, 3}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, F);", {2, 3, 5, 4, 1}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 6, T);", {1, 5, 2, 1, 3, 3}); EidosAssertScriptRaise("setSeed(0); sample(1:5, 6, F);", 12, "insufficient elements"); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, T, (1:5)*(1:5)*(1:5));", {4}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, T, (1.0:5.0)^3);", {4}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, F, (1:5)*(1:5)*(1:5));", {4}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, F, (1.0:5.0)^3);", {4}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, T, (0:4)*(0:4)*(0:4));", {4}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, T, (0.0:4.0)^3);", {4}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, T, (1:5)*(1:5)*(1:5));", {3}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, T, (1.0:5.0)^3);", {3}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, F, (1:5)*(1:5)*(1:5));", {3}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, F, (1.0:5.0)^3);", {3}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, T, (0:4)*(0:4)*(0:4));", {3}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, T, (0.0:4.0)^3);", {3}); EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, T, c(0,0,1,0,0));", {3}); EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, T, c(0,0,1.0,0,0));", {3}); EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 1, F, c(0,0,1,0,0));", {3}); @@ -251,14 +251,15 @@ void _RunFunctionVectorConstructionTests_s_through_z(void) EidosAssertScriptSuccess_IV("setSeed(0); sum(sample(1:5, 2, T, c(1.0,0,100.0,0,0)));", {6}); EidosAssertScriptSuccess_IV("setSeed(0); sum(sample(1:5, 2, F, c(1,0,100,0,0)));", {4}); EidosAssertScriptSuccess_IV("setSeed(0); sum(sample(1:5, 2, F, c(1.0,0,100.0,0,0)));", {4}); - EidosAssertScriptSuccess_IV("setSeed(0); sum(sample(1:5, 100, T, c(1,0,100,0,0)));", {298}); - EidosAssertScriptSuccess_IV("setSeed(0); sum(sample(1:5, 100, T, c(1.0,0,100.0,0,0)));", {298}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, T, (1:5)*(1:5)*(1:5));", {4, 5, 5, 3, 4}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, T, (1.0:5.0)^3);", {4, 5, 5, 3, 4}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, F, (1:5)*(1:5)*(1:5));", {4, 5, 3, 1, 2}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, F, (1.0:5.0)^3);", {4, 5, 3, 1, 2}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, T, (0:4)*(0:4)*(0:4));", {4, 5, 5, 3, 4}); - EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, T, (0.0:4.0)^3);", {4, 5, 5, 3, 4}); + EidosAssertScriptSuccess_IV("setSeed(0); sum(sample(1:5, 100, T, c(1,0,100,0,0)));", {292}); + EidosAssertScriptSuccess_IV("setSeed(0); sum(sample(1:5, 100, T, c(1.0,0,100.0,0,0)));", {292}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, T, (1:5)*(1:5)*(1:5));", {3, 4, 4, 4, 3}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, T, (1.0:5.0)^3);", {3, 4, 4, 4, 3}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, F, (1:5)*(1:5)*(1:5));", {3, 4, 5, 2, 1}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, F, (1.0:5.0)^3);", {3, 4, 5, 2, 1}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, T, (0:4)*(0:4)*(0:4));", {3, 4, 4, 4, 4}); + EidosAssertScriptSuccess_IV("setSeed(0); sample(1:5, 5, T, (0.0:4.0)^3);", {3, 4, 4, 4, 4}); + EidosAssertScriptSuccess_IV("setSeed(0); tabulate(sample(1:5, 10000, T, (0:4)*(0:4)*(0:4)));", {0, 0, 93, 820, 2714, 6373}); EidosAssertScriptRaise("setSeed(1); sample(1:3, 3, F, c(2.0, 3.0, NAN));", 12, "requires all weights to be"); EidosAssertScriptRaise("setSeed(1); sample(1:5, 5, F, (0:4)^3);", 12, "weights summing to"); EidosAssertScriptRaise("setSeed(1); sample(1:5, 5, F, asInteger((0:4)^3));", 12, "weights summing to"); diff --git a/eidos/pcg_extras.hpp b/eidos/pcg_extras.hpp new file mode 100644 index 000000000..041724a91 --- /dev/null +++ b/eidos/pcg_extras.hpp @@ -0,0 +1,666 @@ +/* + * PCG Random Number Generation for C++ + * + * Copyright 2014-2017 Melissa O'Neill , + * and the PCG Project contributors. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + * Licensed under the Apache License, Version 2.0 (provided in + * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) + * or under the MIT license (provided in LICENSE-MIT.txt and at + * http://opensource.org/licenses/MIT), at your option. This file may not + * be copied, modified, or distributed except according to those terms. + * + * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See your chosen license for details. + * + * For additional information about the PCG random number generation scheme, + * visit http://www.pcg-random.org/. + */ + +/* + * This file provides support code that is useful for random-number generation + * but not specific to the PCG generation scheme, including: + * - 128-bit int support for platforms where it isn't available natively + * - bit twiddling operations + * - I/O of 128-bit and 8-bit integers + * - Handling the evilness of SeedSeq + * - Support for efficiently producing random numbers less than a given + * bound + */ + +#ifndef PCG_EXTRAS_HPP_INCLUDED +#define PCG_EXTRAS_HPP_INCLUDED 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __GNUC__ + #include +#endif + +/* + * Abstractions for compiler-specific directives + */ + +#ifdef __GNUC__ + #define PCG_NOINLINE __attribute__((noinline)) +#else + #define PCG_NOINLINE +#endif + +/* + * Some members of the PCG library use 128-bit math. When compiling on 64-bit + * platforms, both GCC and Clang provide 128-bit integer types that are ideal + * for the job. + * + * On 32-bit platforms (or with other compilers), we fall back to a C++ + * class that provides 128-bit unsigned integers instead. It may seem + * like we're reinventing the wheel here, because libraries already exist + * that support large integers, but most existing libraries provide a very + * generic multiprecision code, but here we're operating at a fixed size. + * Also, most other libraries are fairly heavyweight. So we use a direct + * implementation. Sadly, it's much slower than hand-coded assembly or + * direct CPU support. + * + */ +#if __SIZEOF_INT128__ && !PCG_FORCE_EMULATED_128BIT_MATH + namespace pcg_extras { + typedef __uint128_t pcg128_t; + } + #define PCG_128BIT_CONSTANT(high,low) \ + ((pcg_extras::pcg128_t(high) << 64) + low) +#else + #include "pcg_uint128.hpp" + namespace pcg_extras { + typedef pcg_extras::uint_x4 pcg128_t; + } + #define PCG_128BIT_CONSTANT(high,low) \ + pcg_extras::pcg128_t(high,low) + #define PCG_EMULATED_128BIT_MATH 1 +#endif + + +namespace pcg_extras { + +/* + * We often need to represent a "number of bits". When used normally, these + * numbers are never greater than 128, so an unsigned char is plenty. + * If you're using a nonstandard generator of a larger size, you can set + * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers + * might produce faster code if you set it to an unsigned int.) + */ + +#ifndef PCG_BITCOUNT_T + typedef uint8_t bitcount_t; +#else + typedef PCG_BITCOUNT_T bitcount_t; +#endif + +/* + * C++ requires us to be able to serialize RNG state by printing or reading + * it from a stream. Because we use 128-bit ints, we also need to be able + * ot print them, so here is code to do so. + * + * This code provides enough functionality to print 128-bit ints in decimal + * and zero-padded in hex. It's not a full-featured implementation. + */ + +template +std::basic_ostream& +operator<<(std::basic_ostream& out, pcg128_t value) +{ + auto desired_base = out.flags() & out.basefield; + bool want_hex = desired_base == out.hex; + + if (want_hex) { + uint64_t highpart = uint64_t(value >> 64); + uint64_t lowpart = uint64_t(value); + auto desired_width = out.width(); + if (desired_width > 16) { + out.width(desired_width - 16); + } + if (highpart != 0 || desired_width > 16) + out << highpart; + CharT oldfill = '\0'; + if (highpart != 0) { + out.width(16); + oldfill = out.fill('0'); + } + auto oldflags = out.setf(decltype(desired_base){}, out.showbase); + out << lowpart; + out.setf(oldflags); + if (highpart != 0) { + out.fill(oldfill); + } + return out; + } + constexpr size_t MAX_CHARS_128BIT = 40; + + char buffer[MAX_CHARS_128BIT]; + char* pos = buffer+sizeof(buffer); + *(--pos) = '\0'; + constexpr auto BASE = pcg128_t(10ULL); + do { + auto div = value / BASE; + auto mod = uint32_t(value - (div * BASE)); + *(--pos) = '0' + char(mod); + value = div; + } while(value != pcg128_t(0ULL)); + return out << pos; +} + +template +std::basic_istream& +operator>>(std::basic_istream& in, pcg128_t& value) +{ + typename std::basic_istream::sentry s(in); + + if (!s) + return in; + + constexpr auto BASE = pcg128_t(10ULL); + pcg128_t current(0ULL); + bool did_nothing = true; + bool overflow = false; + for(;;) { + CharT wide_ch = in.get(); + if (!in.good()) { + in.clear(std::ios::eofbit); + break; + } + auto ch = in.narrow(wide_ch, '\0'); + if (ch < '0' || ch > '9') { + in.unget(); + break; + } + did_nothing = false; + pcg128_t digit(uint32_t(ch - '0')); + pcg128_t timesbase = current*BASE; + overflow = overflow || timesbase < current; + current = timesbase + digit; + overflow = overflow || current < digit; + } + + if (did_nothing || overflow) { + in.setstate(std::ios::failbit); + if (overflow) + current = ~pcg128_t(0ULL); + } + + value = current; + + return in; +} + +/* + * Likewise, if people use tiny rngs, we'll be serializing uint8_t. + * If we just used the provided IO operators, they'd read/write chars, + * not ints, so we need to define our own. We *can* redefine this operator + * here because we're in our own namespace. + */ + +template +std::basic_ostream& +operator<<(std::basic_ostream&out, uint8_t value) +{ + return out << uint32_t(value); +} + +template +std::basic_istream& +operator>>(std::basic_istream& in, uint8_t& target) +{ + uint32_t value = 0xdecea5edU; + in >> value; + if (!in && value == 0xdecea5edU) + return in; + if (value > uint8_t(~0)) { + in.setstate(std::ios::failbit); + value = ~0U; + } + target = uint8_t(value); + return in; +} + +/* Unfortunately, the above functions don't get found in preference to the + * built in ones, so we create some more specific overloads that will. + * Ugh. + */ + +inline std::ostream& operator<<(std::ostream& out, uint8_t value) +{ + return pcg_extras::operator<< (out, value); +} + +inline std::istream& operator>>(std::istream& in, uint8_t& value) +{ + return pcg_extras::operator>> (in, value); +} + + + +/* + * Useful bitwise operations. + */ + +/* + * XorShifts are invertable, but they are someting of a pain to invert. + * This function backs them out. It's used by the whacky "inside out" + * generator defined later. + */ + +template +inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift) +{ + if (2*shift >= bits) { + return x ^ (x >> shift); + } + itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1; + itype highmask1 = ~lowmask1; + itype top1 = x; + itype bottom1 = x & lowmask1; + top1 ^= top1 >> shift; + top1 &= highmask1; + x = top1 | bottom1; + itype lowmask2 = (itype(1U) << (bits - shift)) - 1; + itype bottom2 = x & lowmask2; + bottom2 = unxorshift(bottom2, bits - shift, shift); + bottom2 &= lowmask1; + return top1 | bottom2; +} + +/* + * Rotate left and right. + * + * In ideal world, compilers would spot idiomatic rotate code and convert it + * to a rotate instruction. Of course, opinions vary on what the correct + * idiom is and how to spot it. For clang, sometimes it generates better + * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM. + */ + +template +inline itype rotl(itype value, bitcount_t rot) +{ + constexpr bitcount_t bits = sizeof(itype) * 8; + constexpr bitcount_t mask = bits - 1; +#if PCG_USE_ZEROCHECK_ROTATE_IDIOM + return rot ? (value << rot) | (value >> (bits - rot)) : value; +#else + return (value << rot) | (value >> ((- rot) & mask)); +#endif +} + +template +inline itype rotr(itype value, bitcount_t rot) +{ + constexpr bitcount_t bits = sizeof(itype) * 8; + constexpr bitcount_t mask = bits - 1; +#if PCG_USE_ZEROCHECK_ROTATE_IDIOM + return rot ? (value >> rot) | (value << (bits - rot)) : value; +#else + return (value >> rot) | (value << ((- rot) & mask)); +#endif +} + +/* Unfortunately, both Clang and GCC sometimes perform poorly when it comes + * to properly recognizing idiomatic rotate code, so for we also provide + * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. + * (I hope that these compilers get better so that this code can die.) + * + * These overloads will be preferred over the general template code above. + */ +#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) + +inline uint8_t rotr(uint8_t value, bitcount_t rot) +{ + asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +} + +inline uint16_t rotr(uint16_t value, bitcount_t rot) +{ + asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +} + +inline uint32_t rotr(uint32_t value, bitcount_t rot) +{ + asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +} + +#if __x86_64__ +inline uint64_t rotr(uint64_t value, bitcount_t rot) +{ + asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +} +#endif // __x86_64__ + +#elif defined(_MSC_VER) + // Use MSVC++ bit rotation intrinsics + +#pragma intrinsic(_rotr, _rotr64, _rotr8, _rotr16) + +inline uint8_t rotr(uint8_t value, bitcount_t rot) +{ + return _rotr8(value, rot); +} + +inline uint16_t rotr(uint16_t value, bitcount_t rot) +{ + return _rotr16(value, rot); +} + +inline uint32_t rotr(uint32_t value, bitcount_t rot) +{ + return _rotr(value, rot); +} + +inline uint64_t rotr(uint64_t value, bitcount_t rot) +{ + return _rotr64(value, rot); +} + +#endif // PCG_USE_INLINE_ASM + + +/* + * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of + * 32-bit integers with seed data, but sometimes we want to produce + * larger or smaller integers. + * + * The following code handles this annoyance. + * + * uneven_copy will copy an array of 32-bit ints to an array of larger or + * smaller ints (actually, the code is general it only needing forward + * iterators). The copy is identical to the one that would be performed if + * we just did memcpy on a standard little-endian machine, but works + * regardless of the endian of the machine (or the weirdness of the ints + * involved). + * + * generate_to initializes an array of integers using a SeedSeq + * object. It is given the size as a static constant at compile time and + * tries to avoid memory allocation. If we're filling in 32-bit constants + * we just do it directly. If we need a separate buffer and it's small, + * we allocate it on the stack. Otherwise, we fall back to heap allocation. + * Ugh. + * + * generate_one produces a single value of some integral type using a + * SeedSeq object. + */ + + /* uneven_copy helper, case where destination ints are less than 32 bit. */ + +template +SrcIter uneven_copy_impl( + SrcIter src_first, DestIter dest_first, DestIter dest_last, + std::true_type) +{ + typedef typename std::iterator_traits::value_type src_t; + typedef typename std::iterator_traits::value_type dest_t; + + constexpr bitcount_t SRC_SIZE = sizeof(src_t); + constexpr bitcount_t DEST_SIZE = sizeof(dest_t); + constexpr bitcount_t DEST_BITS = DEST_SIZE * 8; + constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE; + + size_t count = 0; + src_t value = 0; + + while (dest_first != dest_last) { + if ((count++ % SCALE) == 0) + value = *src_first++; // Get more bits + else + value >>= DEST_BITS; // Move down bits + + *dest_first++ = dest_t(value); // Truncates, ignores high bits. + } + return src_first; +} + + /* uneven_copy helper, case where destination ints are more than 32 bit. */ + +template +SrcIter uneven_copy_impl( + SrcIter src_first, DestIter dest_first, DestIter dest_last, + std::false_type) +{ + typedef typename std::iterator_traits::value_type src_t; + typedef typename std::iterator_traits::value_type dest_t; + + constexpr auto SRC_SIZE = sizeof(src_t); + constexpr auto SRC_BITS = SRC_SIZE * 8; + constexpr auto DEST_SIZE = sizeof(dest_t); + constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE; + + while (dest_first != dest_last) { + dest_t value(0UL); + unsigned int shift = 0; + + for (size_t i = 0; i < SCALE; ++i) { + value |= dest_t(*src_first++) << shift; + shift += SRC_BITS; + } + + *dest_first++ = value; + } + return src_first; +} + +/* uneven_copy, call the right code for larger vs. smaller */ + +template +inline SrcIter uneven_copy(SrcIter src_first, + DestIter dest_first, DestIter dest_last) +{ + typedef typename std::iterator_traits::value_type src_t; + typedef typename std::iterator_traits::value_type dest_t; + + constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t); + + return uneven_copy_impl(src_first, dest_first, dest_last, + std::integral_constant{}); +} + +/* generate_to, fill in a fixed-size array of integral type using a SeedSeq + * (actually works for any random-access iterator) + */ + +template +inline void generate_to_impl(SeedSeq&& generator, DestIter dest, + std::true_type) +{ + generator.generate(dest, dest+size); +} + +template +void generate_to_impl(SeedSeq&& generator, DestIter dest, + std::false_type) +{ + typedef typename std::iterator_traits::value_type dest_t; + constexpr auto DEST_SIZE = sizeof(dest_t); + constexpr auto GEN_SIZE = sizeof(uint32_t); + + constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; + constexpr size_t FROM_ELEMS = + GEN_IS_SMALLER + ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE) + : (size + (GEN_SIZE / DEST_SIZE) - 1) + / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); + // this odd code ^^^^^^^^^^^^^^^^^ is work-around for + // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 + + if (FROM_ELEMS <= 1024) { + uint32_t buffer[FROM_ELEMS]; + generator.generate(buffer, buffer+FROM_ELEMS); + uneven_copy(buffer, dest, dest+size); + } else { + uint32_t* buffer = static_cast(malloc(GEN_SIZE * FROM_ELEMS)); + generator.generate(buffer, buffer+FROM_ELEMS); + uneven_copy(buffer, dest, dest+size); + free(static_cast(buffer)); + } +} + +template +inline void generate_to(SeedSeq&& generator, DestIter dest) +{ + typedef typename std::iterator_traits::value_type dest_t; + constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); + + generate_to_impl(std::forward(generator), dest, + std::integral_constant{}); +} + +/* generate_one, produce a value of integral type using a SeedSeq + * (optionally, we can have it produce more than one and pick which one + * we want) + */ + +template +inline UInt generate_one(SeedSeq&& generator) +{ + UInt result[N]; + generate_to(std::forward(generator), result); + return result[i]; +} + +template +auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) + -> typename RngType::result_type +{ + typedef typename RngType::result_type rtype; + rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) + % upper_bound; + for (;;) { + rtype r = rng() - RngType::min(); + if (r >= threshold) + return r % upper_bound; + } +} + +template +void shuffle(Iter from, Iter to, RandType&& rng) +{ + typedef typename std::iterator_traits::difference_type delta_t; + typedef typename std::remove_reference::type::result_type result_t; + auto count = to - from; + while (count > 1) { + delta_t chosen = delta_t(bounded_rand(rng, result_t(count))); + --count; + --to; + using std::swap; + swap(*(from + chosen), *to); + } +} + +/* + * Although std::seed_seq is useful, it isn't everything. Often we want to + * initialize a random-number generator some other way, such as from a random + * device. + * + * Technically, it does not meet the requirements of a SeedSequence because + * it lacks some of the rarely-used member functions (some of which would + * be impossible to provide). However the C++ standard is quite specific + * that actual engines only called the generate method, so it ought not to be + * a problem in practice. + */ + +template +class seed_seq_from { +private: + RngType rng_; + + typedef uint_least32_t result_type; + +public: + template + seed_seq_from(Args&&... args) : + rng_(std::forward(args)...) + { + // Nothing (else) to do... + } + + template + void generate(Iter start, Iter finish) + { + for (auto i = start; i != finish; ++i) + *i = result_type(rng_()); + } + + constexpr size_t size() const + { + return (sizeof(typename RngType::result_type) > sizeof(result_type) + && RngType::max() > ~size_t(0UL)) + ? ~size_t(0UL) + : size_t(RngType::max()); + } +}; + +/* + * Sometimes you might want a distinct seed based on when the program + * was compiled. That way, a particular instance of the program will + * behave the same way, but when recompiled it'll produce a different + * value. + */ + +template +struct static_arbitrary_seed { +private: + static constexpr IntType fnv(IntType hash, const char* pos) { + return *pos == '\0' + ? hash + : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1)); + } + +public: + static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)), + __DATE__ __TIME__ __FILE__); +}; + +// Sometimes, when debugging or testing, it's handy to be able print the name +// of a (in human-readable form). This code allows the idiom: +// +// cout << printable_typename() +// +// to print out my_foo_type_t (or its concrete type if it is a synonym) + +#if __cpp_rtti || __GXX_RTTI + +template +struct printable_typename {}; + +template +std::ostream& operator<<(std::ostream& out, printable_typename) { + const char *implementation_typename = typeid(T).name(); +#ifdef __GNUC__ + int status; + char* pretty_name = + abi::__cxa_demangle(implementation_typename, nullptr, nullptr, &status); + if (status == 0) + out << pretty_name; + free(static_cast(pretty_name)); + if (status == 0) + return out; +#endif + out << implementation_typename; + return out; +} + +#endif // __cpp_rtti || __GXX_RTTI + +} // namespace pcg_extras + +#endif // PCG_EXTRAS_HPP_INCLUDED diff --git a/eidos/pcg_random.hpp b/eidos/pcg_random.hpp new file mode 100644 index 000000000..d479a8151 --- /dev/null +++ b/eidos/pcg_random.hpp @@ -0,0 +1,1951 @@ +/* + * PCG Random Number Generation for C++ + * + * Copyright 2014-2022 Melissa O'Neill , + * and the PCG Project contributors. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + * + * Licensed under the Apache License, Version 2.0 (provided in + * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) + * or under the MIT license (provided in LICENSE-MIT.txt and at + * http://opensource.org/licenses/MIT), at your option. This file may not + * be copied, modified, or distributed except according to those terms. + * + * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either + * express or implied. See your chosen license for details. + * + * For additional information about the PCG random number generation scheme, + * visit http://www.pcg-random.org/. + */ + +/* + * This code provides the reference implementation of the PCG family of + * random number generators. The code is complex because it implements + * + * - several members of the PCG family, specifically members corresponding + * to the output functions: + * - XSH RR (good for 64-bit state, 32-bit output) + * - XSH RS (good for 64-bit state, 32-bit output) + * - XSL RR (good for 128-bit state, 64-bit output) + * - RXS M XS (statistically most powerful generator) + * - XSL RR RR (good for 128-bit state, 128-bit output) + * - and RXS, RXS M, XSH, XSL (mostly for testing) + * - at potentially *arbitrary* bit sizes + * - with four different techniques for random streams (MCG, one-stream + * LCG, settable-stream LCG, unique-stream LCG) + * - and the extended generation schemes allowing arbitrary periods + * - with all features of C++11 random number generation (and more), + * some of which are somewhat painful, including + * - initializing with a SeedSequence which writes 32-bit values + * to memory, even though the state of the generator may not + * use 32-bit values (it might use smaller or larger integers) + * - I/O for RNGs and a prescribed format, which needs to handle + * the issue that 8-bit and 128-bit integers don't have working + * I/O routines (e.g., normally 8-bit = char, not integer) + * - equality and inequality for RNGs + * - and a number of convenience typedefs to mask all the complexity + * + * The code employees a fairly heavy level of abstraction, and has to deal + * with various C++ minutia. If you're looking to learn about how the PCG + * scheme works, you're probably best of starting with one of the other + * codebases (see www.pcg-random.org). But if you're curious about the + * constants for the various output functions used in those other, simpler, + * codebases, this code shows how they are calculated. + * + * On the positive side, at least there are convenience typedefs so that you + * can say + * + * pcg32 myRNG; + * + * rather than: + * + * pcg_detail::engine< + * uint32_t, // Output Type + * uint64_t, // State Type + * pcg_detail::xsh_rr_mixin, true, // Output Func + * pcg_detail::specific_stream, // Stream Kind + * pcg_detail::default_multiplier // LCG Mult + * > myRNG; + * + */ + +#ifndef PCG_RAND_HPP_INCLUDED +#define PCG_RAND_HPP_INCLUDED 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _MSC_VER + #pragma warning(disable:4146) +#endif + +#ifdef _MSC_VER + #define PCG_ALWAYS_INLINE __forceinline +#elif __GNUC__ + #define PCG_ALWAYS_INLINE __attribute__((always_inline)) +#else + #define PCG_ALWAYS_INLINE inline +#endif + +/* + * The pcg_extras namespace contains some support code that is likely to + * be useful for a variety of RNGs, including: + * - 128-bit int support for platforms where it isn't available natively + * - bit twiddling operations + * - I/O of 128-bit and 8-bit integers + * - Handling the evilness of SeedSeq + * - Support for efficiently producing random numbers less than a given + * bound + */ + +#include "pcg_extras.hpp" + +namespace pcg_detail { + +using namespace pcg_extras; + +/* + * The LCG generators need some constants to function. This code lets you + * look up the constant by *type*. For example + * + * default_multiplier::multiplier() + * + * gives you the default multiplier for 32-bit integers. We use the name + * of the constant and not a generic word like value to allow these classes + * to be used as mixins. + */ + +template +struct default_multiplier { + // Not defined for an arbitrary type +}; + +template +struct default_increment { + // Not defined for an arbitrary type +}; + +#define PCG_DEFINE_CONSTANT(type, what, kind, constant) \ + template <> \ + struct what ## _ ## kind { \ + static constexpr type kind() { \ + return constant; \ + } \ + }; + +PCG_DEFINE_CONSTANT(uint8_t, default, multiplier, 141U) +PCG_DEFINE_CONSTANT(uint8_t, default, increment, 77U) + +PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, 12829U) +PCG_DEFINE_CONSTANT(uint16_t, default, increment, 47989U) + +PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, 747796405U) +PCG_DEFINE_CONSTANT(uint32_t, default, increment, 2891336453U) + +PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, 6364136223846793005ULL) +PCG_DEFINE_CONSTANT(uint64_t, default, increment, 1442695040888963407ULL) + +PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier, + PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL)) +PCG_DEFINE_CONSTANT(pcg128_t, default, increment, + PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL)) + +/* Alternative (cheaper) multipliers for 128-bit */ + +template +struct cheap_multiplier : public default_multiplier { + // For most types just use the default. +}; + +template <> +struct cheap_multiplier { + static constexpr uint64_t multiplier() { + return 0xda942042e4dd58b5ULL; + } +}; + + +/* + * Each PCG generator is available in four variants, based on how it applies + * the additive constant for its underlying LCG; the variations are: + * + * single stream - all instances use the same fixed constant, thus + * the RNG always somewhere in same sequence + * mcg - adds zero, resulting in a single stream and reduced + * period + * specific stream - the constant can be changed at any time, selecting + * a different random sequence + * unique stream - the constant is based on the memory address of the + * object, thus every RNG has its own unique sequence + * + * This variation is provided though mixin classes which define a function + * value called increment() that returns the necessary additive constant. + */ + + + +/* + * unique stream + */ + + +template +class unique_stream { +protected: + static constexpr bool is_mcg = false; + + // Is never called, but is provided for symmetry with specific_stream + void set_stream(...) + { + abort(); + } + +public: + typedef itype state_type; + + constexpr itype increment() const { + return itype(reinterpret_cast(this) | 1); + } + + constexpr itype stream() const + { + return increment() >> 1; + } + + static constexpr bool can_specify_stream = false; + + static constexpr size_t streams_pow2() + { + return (sizeof(itype) < sizeof(size_t) ? sizeof(itype) + : sizeof(size_t))*8 - 1u; + } + +protected: + constexpr unique_stream() = default; +}; + + +/* + * no stream (mcg) + */ + +template +class no_stream { +protected: + static constexpr bool is_mcg = true; + + // Is never called, but is provided for symmetry with specific_stream + void set_stream(...) + { + abort(); + } + +public: + typedef itype state_type; + + static constexpr itype increment() { + return 0; + } + + static constexpr bool can_specify_stream = false; + + static constexpr size_t streams_pow2() + { + return 0u; + } + +protected: + constexpr no_stream() = default; +}; + + +/* + * single stream/sequence (oneseq) + */ + +template +class oneseq_stream : public default_increment { +protected: + static constexpr bool is_mcg = false; + + // Is never called, but is provided for symmetry with specific_stream + void set_stream(...) + { + abort(); + } + +public: + typedef itype state_type; + + static constexpr itype stream() + { + return default_increment::increment() >> 1; + } + + static constexpr bool can_specify_stream = false; + + static constexpr size_t streams_pow2() + { + return 0u; + } + +protected: + constexpr oneseq_stream() = default; +}; + + +/* + * specific stream + */ + +template +class specific_stream { +protected: + static constexpr bool is_mcg = false; + + itype inc_ = default_increment::increment(); + +public: + typedef itype state_type; + typedef itype stream_state; + + constexpr itype increment() const { + return inc_; + } + + itype stream() + { + return inc_ >> 1; + } + + void set_stream(itype specific_seq) + { + inc_ = (specific_seq << 1) | 1; + } + + static constexpr bool can_specify_stream = true; + + static constexpr size_t streams_pow2() + { + return (sizeof(itype)*8) - 1u; + } + +protected: + specific_stream() = default; + + specific_stream(itype specific_seq) + : inc_(itype(specific_seq << 1) | itype(1U)) + { + // Nothing (else) to do. + } +}; + + +/* + * This is where it all comes together. This function joins together three + * mixin classes which define + * - the LCG additive constant (the stream) + * - the LCG multiplier + * - the output function + * in addition, we specify the type of the LCG state, and the result type, + * and whether to use the pre-advance version of the state for the output + * (increasing instruction-level parallelism) or the post-advance version + * (reducing register pressure). + * + * Given the high level of parameterization, the code has to use some + * template-metaprogramming tricks to handle some of the subtle variations + * involved. + */ + +template , + typename multiplier_mixin = default_multiplier > +class engine : protected output_mixin, + public stream_mixin, + protected multiplier_mixin { +protected: + itype state_; + + struct can_specify_stream_tag {}; + struct no_specifiable_stream_tag {}; + + using stream_mixin::increment; + using multiplier_mixin::multiplier; + +public: + typedef xtype result_type; + typedef itype state_type; + + static constexpr size_t period_pow2() + { + return sizeof(state_type)*8 - 2*stream_mixin::is_mcg; + } + + // It would be nice to use std::numeric_limits for these, but + // we can't be sure that it'd be defined for the 128-bit types. + + static constexpr result_type min() + { + return result_type(0UL); + } + + static constexpr result_type max() + { + return result_type(~result_type(0UL)); + } + +protected: + itype bump(itype state) + { + return state * multiplier() + increment(); + } + + itype base_generate() + { + return state_ = bump(state_); + } + + itype base_generate0() + { + itype old_state = state_; + state_ = bump(state_); + return old_state; + } + +public: + result_type operator()() + { + if (output_previous) + return this->output(base_generate0()); + else + return this->output(base_generate()); + } + + result_type operator()(result_type upper_bound) + { + return bounded_rand(*this, upper_bound); + } + +protected: + static itype advance(itype state, itype delta, + itype cur_mult, itype cur_plus); + + static itype distance(itype cur_state, itype newstate, itype cur_mult, + itype cur_plus, itype mask = ~itype(0U)); + + itype distance(itype newstate, itype mask = itype(~itype(0U))) const + { + return distance(state_, newstate, multiplier(), increment(), mask); + } + +public: + void advance(itype delta) + { + state_ = advance(state_, delta, this->multiplier(), this->increment()); + } + + void backstep(itype delta) + { + advance(-delta); + } + + void discard(itype delta) + { + advance(delta); + } + + bool wrapped() + { + if (stream_mixin::is_mcg) { + // For MCGs, the low order two bits never change. In this + // implementation, we keep them fixed at 3 to make this test + // easier. + return state_ == 3; + } else { + return state_ == 0; + } + } + + engine(itype state = itype(0xcafef00dd15ea5e5ULL)) + : state_(this->is_mcg ? state|state_type(3U) + : bump(state + this->increment())) + { + // Nothing else to do. + } + + // This function may or may not exist. It thus has to be a template + // to use SFINAE; users don't have to worry about its template-ness. + + template + engine(itype state, typename sm::stream_state stream_seed) + : stream_mixin(stream_seed), + state_(this->is_mcg ? state|state_type(3U) + : bump(state + this->increment())) + { + // Nothing else to do. + } + + template + engine(SeedSeq&& seedSeq, typename std::enable_if< + !stream_mixin::can_specify_stream + && !std::is_convertible::value + && !std::is_convertible::value, + no_specifiable_stream_tag>::type = {}) + : engine(generate_one(std::forward(seedSeq))) + { + // Nothing else to do. + } + + template + engine(SeedSeq&& seedSeq, typename std::enable_if< + stream_mixin::can_specify_stream + && !std::is_convertible::value + && !std::is_convertible::value, + can_specify_stream_tag>::type = {}) + { + itype seeddata[2]; + generate_to<2>(std::forward(seedSeq), seeddata); + seed(seeddata[1], seeddata[0]); + } + + + template + void seed(Args&&... args) + { + new (this) engine(std::forward(args)...); + } + + template + friend bool operator==(const engine&, + const engine&); + + template + friend itype1 operator-(const engine&, + const engine&); + + template + friend std::basic_ostream& + operator<<(std::basic_ostream& out, + const engine&); + + template + friend std::basic_istream& + operator>>(std::basic_istream& in, + engine& rng); +}; + +template +std::basic_ostream& +operator<<(std::basic_ostream& out, + const engine& rng) +{ + using pcg_extras::operator<<; + + auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); + auto space = out.widen(' '); + auto orig_fill = out.fill(); + + out << rng.multiplier() << space + << rng.increment() << space + << rng.state_; + + out.flags(orig_flags); + out.fill(orig_fill); + return out; +} + + +template +std::basic_istream& +operator>>(std::basic_istream& in, + engine& rng) +{ + using pcg_extras::operator>>; + + auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); + + itype multiplier, increment, state; + in >> multiplier >> increment >> state; + + if (!in.fail()) { + bool good = true; + if (multiplier != rng.multiplier()) { + good = false; + } else if (rng.can_specify_stream) { + rng.set_stream(increment >> 1); + } else if (increment != rng.increment()) { + good = false; + } + if (good) { + rng.state_ = state; + } else { + in.clear(std::ios::failbit); + } + } + + in.flags(orig_flags); + return in; +} + + +template +itype engine::advance( + itype state, itype delta, itype cur_mult, itype cur_plus) +{ + // The method used here is based on Brown, "Random Number Generation + // with Arbitrary Stride,", Transactions of the American Nuclear + // Society (Nov. 1994). The algorithm is very similar to fast + // exponentiation. + // + // Even though delta is an unsigned integer, we can pass a + // signed integer to go backwards, it just goes "the long way round". + + constexpr itype ZERO = 0u; // itype may be a non-trivial types, so + constexpr itype ONE = 1u; // we define some ugly constants. + itype acc_mult = 1; + itype acc_plus = 0; + while (delta > ZERO) { + if (delta & ONE) { + acc_mult *= cur_mult; + acc_plus = acc_plus*cur_mult + cur_plus; + } + cur_plus = (cur_mult+ONE)*cur_plus; + cur_mult *= cur_mult; + delta >>= 1; + } + return acc_mult * state + acc_plus; +} + +template +itype engine::distance( + itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask) +{ + constexpr itype ONE = 1u; // itype could be weird, so use constant + bool is_mcg = cur_plus == itype(0); + itype the_bit = is_mcg ? itype(4u) : itype(1u); + itype distance = 0u; + while ((cur_state & mask) != (newstate & mask)) { + if ((cur_state & the_bit) != (newstate & the_bit)) { + cur_state = cur_state * cur_mult + cur_plus; + distance |= the_bit; + } + assert((cur_state & the_bit) == (newstate & the_bit)); + the_bit <<= 1; + cur_plus = (cur_mult+ONE)*cur_plus; + cur_mult *= cur_mult; + } + return is_mcg ? distance >> 2 : distance; +} + +template +itype operator-(const engine& lhs, + const engine& rhs) +{ + static_assert( + std::is_same::value && + std::is_same::value, + "Incomparable generators"); + if (lhs.increment() == rhs.increment()) { + return rhs.distance(lhs.state_); + } else { + constexpr itype ONE = 1u; + itype lhs_diff = lhs.increment() + (lhs.multiplier()-ONE) * lhs.state_; + itype rhs_diff = rhs.increment() + (rhs.multiplier()-ONE) * rhs.state_; + if ((lhs_diff & itype(3u)) != (rhs_diff & itype(3u))) { + rhs_diff = -rhs_diff; + } + return rhs.distance(rhs_diff, lhs_diff, rhs.multiplier(), itype(0u)); + } +} + + +template +bool operator==(const engine& lhs, + const engine& rhs) +{ + return (lhs.multiplier() == rhs.multiplier()) + && (lhs.increment() == rhs.increment()) + && (lhs.state_ == rhs.state_); +} + +template +inline bool operator!=(const engine& lhs, + const engine& rhs) +{ + return !operator==(lhs,rhs); +} + + +template class output_mixin, + bool output_previous = (sizeof(itype) <= 8), + template class multiplier_mixin = default_multiplier> +using oneseq_base = engine, output_previous, + oneseq_stream, + multiplier_mixin >; + +template class output_mixin, + bool output_previous = (sizeof(itype) <= 8), + template class multiplier_mixin = default_multiplier> +using unique_base = engine, output_previous, + unique_stream, + multiplier_mixin >; + +template class output_mixin, + bool output_previous = (sizeof(itype) <= 8), + template class multiplier_mixin = default_multiplier> +using setseq_base = engine, output_previous, + specific_stream, + multiplier_mixin >; + +template class output_mixin, + bool output_previous = (sizeof(itype) <= 8), + template class multiplier_mixin = default_multiplier> +using mcg_base = engine, output_previous, + no_stream, + multiplier_mixin >; + +/* + * OUTPUT FUNCTIONS. + * + * These are the core of the PCG generation scheme. They specify how to + * turn the base LCG's internal state into the output value of the final + * generator. + * + * They're implemented as mixin classes. + * + * All of the classes have code that is written to allow it to be applied + * at *arbitrary* bit sizes, although in practice they'll only be used at + * standard sizes supported by C++. + */ + +/* + * XSH RS -- high xorshift, followed by a random shift + * + * Fast. A good performer. + */ + +template +struct xsh_rs_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t opbits = + sparebits-5 >= 64 ? 5 + : sparebits-4 >= 32 ? 4 + : sparebits-3 >= 16 ? 3 + : sparebits-2 >= 4 ? 2 + : sparebits-1 >= 1 ? 1 + : 0; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t maxrandshift = mask; + constexpr bitcount_t topspare = opbits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = topspare + (xtypebits+maxrandshift)/2; + bitcount_t rshift = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + internal ^= internal >> xshift; + xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift)); + return result; + } +}; + +/* + * XSH RR -- high xorshift, followed by a random rotate + * + * Fast. A good performer. Slightly better statistically than XSH RS. + */ + +template +struct xsh_rr_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t wantedopbits = + xtypebits >= 128 ? 7 + : xtypebits >= 64 ? 6 + : xtypebits >= 32 ? 5 + : xtypebits >= 16 ? 4 + : 3; + constexpr bitcount_t opbits = + sparebits >= wantedopbits ? wantedopbits + : sparebits; + constexpr bitcount_t amplifier = wantedopbits - opbits; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t topspare = opbits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits)/2; + bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask + : 0; + bitcount_t amprot = (rot << amplifier) & mask; + internal ^= internal >> xshift; + xtype result = xtype(internal >> bottomspare); + result = rotr(result, amprot); + return result; + } +}; + +/* + * RXS -- random xorshift + */ + +template +struct rxs_mixin { +static xtype output_rxs(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8); + constexpr bitcount_t shift = bits - xtypebits; + constexpr bitcount_t extrashift = (xtypebits - shift)/2; + bitcount_t rshift = shift > 64+8 ? (internal >> (bits - 6)) & 63 + : shift > 32+4 ? (internal >> (bits - 5)) & 31 + : shift > 16+2 ? (internal >> (bits - 4)) & 15 + : shift > 8+1 ? (internal >> (bits - 3)) & 7 + : shift > 4+1 ? (internal >> (bits - 2)) & 3 + : shift > 2+1 ? (internal >> (bits - 1)) & 1 + : 0; + internal ^= internal >> (shift + extrashift - rshift); + xtype result = internal >> rshift; + return result; + } +}; + +/* + * RXS M XS -- random xorshift, mcg multiply, fixed xorshift + * + * The most statistically powerful generator, but all those steps + * make it slower than some of the others. We give it the rottenest jobs. + * + * Because it's usually used in contexts where the state type and the + * result type are the same, it is a permutation and is thus invertable. + * We thus provide a function to invert it. This function is used to + * for the "inside out" generator used by the extended generator. + */ + +/* Defined type-based concepts for the multiplication step. They're actually + * all derived by truncating the 128-bit, which was computed to be a good + * "universal" constant. + */ + +template +struct mcg_multiplier { + // Not defined for an arbitrary type +}; + +template +struct mcg_unmultiplier { + // Not defined for an arbitrary type +}; + +PCG_DEFINE_CONSTANT(uint8_t, mcg, multiplier, 217U) +PCG_DEFINE_CONSTANT(uint8_t, mcg, unmultiplier, 105U) + +PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier, 62169U) +PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, 28009U) + +PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier, 277803737U) +PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, 2897767785U) + +PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier, 12605985483714917081ULL) +PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, 15009553638781119849ULL) + +PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier, + PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL)) +PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier, + PCG_128BIT_CONSTANT(14422606686972528997ULL, 15009553638781119849ULL)) + + +template +struct rxs_m_xs_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t opbits = xtypebits >= 128 ? 6 + : xtypebits >= 64 ? 5 + : xtypebits >= 32 ? 4 + : xtypebits >= 16 ? 3 + : 2; + constexpr bitcount_t shift = bits - xtypebits; + constexpr bitcount_t mask = (1 << opbits) - 1; + bitcount_t rshift = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + internal ^= internal >> (opbits + rshift); + internal *= mcg_multiplier::multiplier(); + xtype result = internal >> shift; + result ^= result >> ((2U*xtypebits+2U)/3U); + return result; + } + + static itype unoutput(itype internal) + { + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t opbits = bits >= 128 ? 6 + : bits >= 64 ? 5 + : bits >= 32 ? 4 + : bits >= 16 ? 3 + : 2; + constexpr bitcount_t mask = (1 << opbits) - 1; + + internal = unxorshift(internal, bits, (2U*bits+2U)/3U); + + internal *= mcg_unmultiplier::unmultiplier(); + + bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; + internal = unxorshift(internal, bits, opbits + rshift); + + return internal; + } +}; + + +/* + * RXS M -- random xorshift, mcg multiply + */ + +template +struct rxs_m_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t opbits = xtypebits >= 128 ? 6 + : xtypebits >= 64 ? 5 + : xtypebits >= 32 ? 4 + : xtypebits >= 16 ? 3 + : 2; + constexpr bitcount_t shift = bits - xtypebits; + constexpr bitcount_t mask = (1 << opbits) - 1; + bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; + internal ^= internal >> (opbits + rshift); + internal *= mcg_multiplier::multiplier(); + xtype result = internal >> shift; + return result; + } +}; + + +/* + * DXSM -- double xorshift multiply + * + * This is a new, more powerful output permutation (added in 2019). It's + * a more comprehensive scrambling than RXS M, but runs faster on 128-bit + * types. Although primarily intended for use at large sizes, also works + * at smaller sizes as well. + * + * This permutation is similar to xorshift multiply hash functions, except + * that one of the multipliers is the LCG multiplier (to avoid needing to + * have a second constant) and the other is based on the low-order bits. + * This latter aspect means that the scrambling applied to the high bits + * depends on the low bits, and makes it (to my eye) impractical to back + * out the permutation without having the low-order bits. + */ + +template +struct dxsm_mixin { + inline xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t itypebits = bitcount_t(sizeof(itype) * 8); + static_assert(xtypebits <= itypebits/2, + "Output type must be half the size of the state type."); + + xtype hi = xtype(internal >> (itypebits - xtypebits)); + xtype lo = xtype(internal); + + lo |= 1; + hi ^= hi >> (xtypebits/2); + hi *= xtype(cheap_multiplier::multiplier()); + hi ^= hi >> (3*(xtypebits/4)); + hi *= lo; + return hi; + } +}; + + +/* + * XSL RR -- fixed xorshift (to low bits), random rotate + * + * Useful for 128-bit types that are split across two CPU registers. + */ + +template +struct xsl_rr_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7 + : xtypebits >= 64 ? 6 + : xtypebits >= 32 ? 5 + : xtypebits >= 16 ? 4 + : 3; + constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits + : sparebits; + constexpr bitcount_t amplifier = wantedopbits - opbits; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t topspare = sparebits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + + bitcount_t rot = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + bitcount_t amprot = (rot << amplifier) & mask; + internal ^= internal >> xshift; + xtype result = xtype(internal >> bottomspare); + result = rotr(result, amprot); + return result; + } +}; + + +/* + * XSL RR RR -- fixed xorshift (to low bits), random rotate (both parts) + * + * Useful for 128-bit types that are split across two CPU registers. + * If you really want an invertable 128-bit RNG, I guess this is the one. + */ + +template struct halfsize_trait {}; +template <> struct halfsize_trait { typedef uint64_t type; }; +template <> struct halfsize_trait { typedef uint32_t type; }; +template <> struct halfsize_trait { typedef uint16_t type; }; +template <> struct halfsize_trait { typedef uint8_t type; }; + +template +struct xsl_rr_rr_mixin { + typedef typename halfsize_trait::type htype; + + static itype output(itype internal) + { + constexpr bitcount_t htypebits = bitcount_t(sizeof(htype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - htypebits; + constexpr bitcount_t wantedopbits = htypebits >= 128 ? 7 + : htypebits >= 64 ? 6 + : htypebits >= 32 ? 5 + : htypebits >= 16 ? 4 + : 3; + constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits + : sparebits; + constexpr bitcount_t amplifier = wantedopbits - opbits; + constexpr bitcount_t mask = (1 << opbits) - 1; + constexpr bitcount_t topspare = sparebits; + constexpr bitcount_t xshift = (topspare + htypebits) / 2; + + bitcount_t rot = + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + bitcount_t amprot = (rot << amplifier) & mask; + internal ^= internal >> xshift; + htype lowbits = htype(internal); + lowbits = rotr(lowbits, amprot); + htype highbits = htype(internal >> topspare); + bitcount_t rot2 = lowbits & mask; + bitcount_t amprot2 = (rot2 << amplifier) & mask; + highbits = rotr(highbits, amprot2); + return (itype(highbits) << topspare) ^ itype(lowbits); + } +}; + + +/* + * XSH -- fixed xorshift (to high bits) + * + * You shouldn't use this at 64-bits or less. + */ + +template +struct xsh_mixin { + static xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t topspare = 0; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + + internal ^= internal >> xshift; + xtype result = internal >> bottomspare; + return result; + } +}; + +/* + * XSL -- fixed xorshift (to low bits) + * + * You shouldn't use this at 64-bits or less. + */ + +template +struct xsl_mixin { + inline xtype output(itype internal) + { + constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); + constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); + constexpr bitcount_t sparebits = bits - xtypebits; + constexpr bitcount_t topspare = sparebits; + constexpr bitcount_t bottomspare = sparebits - topspare; + constexpr bitcount_t xshift = (topspare + xtypebits) / 2; + + internal ^= internal >> xshift; + xtype result = internal >> bottomspare; + return result; + } +}; + + +/* ---- End of Output Functions ---- */ + + +template +struct inside_out : private baseclass { + inside_out() = delete; + + typedef typename baseclass::result_type result_type; + typedef typename baseclass::state_type state_type; + static_assert(sizeof(result_type) == sizeof(state_type), + "Require a RNG whose output function is a permutation"); + + static bool external_step(result_type& randval, size_t i) + { + state_type state = baseclass::unoutput(randval); + state = state * baseclass::multiplier() + baseclass::increment() + + state_type(i*2); + result_type result = baseclass::output(state); + randval = result; + state_type zero = + baseclass::is_mcg ? state & state_type(3U) : state_type(0U); + return result == zero; + } + + static bool external_advance(result_type& randval, size_t i, + result_type delta, bool forwards = true) + { + state_type state = baseclass::unoutput(randval); + state_type mult = baseclass::multiplier(); + state_type inc = baseclass::increment() + state_type(i*2); + state_type zero = + baseclass::is_mcg ? state & state_type(3U) : state_type(0U); + state_type dist_to_zero = baseclass::distance(state, zero, mult, inc); + bool crosses_zero = + forwards ? dist_to_zero <= delta + : (-dist_to_zero) <= delta; + if (!forwards) + delta = -delta; + state = baseclass::advance(state, delta, mult, inc); + randval = baseclass::output(state); + return crosses_zero; + } +}; + + +template +class extended : public baseclass { +public: + typedef typename baseclass::state_type state_type; + typedef typename baseclass::result_type result_type; + typedef inside_out insideout; + +private: + static constexpr bitcount_t rtypebits = sizeof(result_type)*8; + static constexpr bitcount_t stypebits = sizeof(state_type)*8; + + static constexpr bitcount_t tick_limit_pow2 = 64U; + + static constexpr size_t table_size = 1UL << table_pow2; + static constexpr size_t table_shift = stypebits - table_pow2; + static constexpr state_type table_mask = + (state_type(1U) << table_pow2) - state_type(1U); + + static constexpr bool may_tick = + (advance_pow2 < stypebits) && (advance_pow2 < tick_limit_pow2); + static constexpr size_t tick_shift = stypebits - advance_pow2; + static constexpr state_type tick_mask = + may_tick ? state_type( + (uint64_t(1) << (advance_pow2*may_tick)) - 1) + // ^-- stupidity to appease GCC warnings + : ~state_type(0U); + + static constexpr bool may_tock = stypebits < tick_limit_pow2; + + result_type data_[table_size]; + + PCG_NOINLINE void advance_table(); + + PCG_NOINLINE void advance_table(state_type delta, bool isForwards = true); + + result_type& get_extended_value() + { + state_type state = this->state_; + if (kdd && baseclass::is_mcg) { + // The low order bits of an MCG are constant, so drop them. + state >>= 2; + } + size_t index = kdd ? state & table_mask + : state >> table_shift; + + if (may_tick) { + bool tick = kdd ? (state & tick_mask) == state_type(0u) + : (state >> tick_shift) == state_type(0u); + if (tick) + advance_table(); + } + if (may_tock) { + bool tock = state == state_type(0u); + if (tock) + advance_table(); + } + return data_[index]; + } + +public: + static constexpr size_t period_pow2() + { + return baseclass::period_pow2() + table_size*extvalclass::period_pow2(); + } + + PCG_ALWAYS_INLINE result_type operator()() + { + result_type rhs = get_extended_value(); + result_type lhs = this->baseclass::operator()(); + return lhs ^ rhs; + } + + result_type operator()(result_type upper_bound) + { + return bounded_rand(*this, upper_bound); + } + + void set(result_type wanted) + { + result_type& rhs = get_extended_value(); + result_type lhs = this->baseclass::operator()(); + rhs = lhs ^ wanted; + } + + void advance(state_type distance, bool forwards = true); + + void backstep(state_type distance) + { + advance(distance, false); + } + + extended(const result_type* data) + : baseclass() + { + datainit(data); + } + + extended(const result_type* data, state_type seed) + : baseclass(seed) + { + datainit(data); + } + + // This function may or may not exist. It thus has to be a template + // to use SFINAE; users don't have to worry about its template-ness. + + template + extended(const result_type* data, state_type seed, + typename bc::stream_state stream_seed) + : baseclass(seed, stream_seed) + { + datainit(data); + } + + extended() + : baseclass() + { + selfinit(); + } + + extended(state_type seed) + : baseclass(seed) + { + selfinit(); + } + + // This function may or may not exist. It thus has to be a template + // to use SFINAE; users don't have to worry about its template-ness. + + template + extended(state_type seed, typename bc::stream_state stream_seed) + : baseclass(seed, stream_seed) + { + selfinit(); + } + +private: + void selfinit(); + void datainit(const result_type* data); + +public: + + template::value + && !std::is_convertible::value>::type> + extended(SeedSeq&& seedSeq) + : baseclass(seedSeq) + { + generate_to(seedSeq, data_); + } + + template + void seed(Args&&... args) + { + new (this) extended(std::forward(args)...); + } + + template + friend bool operator==(const extended&, + const extended&); + + template + friend std::basic_ostream& + operator<<(std::basic_ostream& out, + const extended&); + + template + friend std::basic_istream& + operator>>(std::basic_istream& in, + extended&); + +}; + + +template +void extended::datainit( + const result_type* data) +{ + for (size_t i = 0; i < table_size; ++i) + data_[i] = data[i]; +} + +template +void extended::selfinit() +{ + // We need to fill the extended table with something, and we have + // very little provided data, so we use the base generator to + // produce values. Although not ideal (use a seed sequence, folks!), + // unexpected correlations are mitigated by + // - using XOR differences rather than the number directly + // - the way the table is accessed, its values *won't* be accessed + // in the same order the were written. + // - any strange correlations would only be apparent if we + // were to backstep the generator so that the base generator + // was generating the same values again + result_type lhs = baseclass::operator()(); + result_type rhs = baseclass::operator()(); + result_type xdiff = lhs - rhs; + for (size_t i = 0; i < table_size; ++i) { + data_[i] = baseclass::operator()() ^ xdiff; + } +} + +template +bool operator==(const extended& lhs, + const extended& rhs) +{ + auto& base_lhs = static_cast(lhs); + auto& base_rhs = static_cast(rhs); + return base_lhs == base_rhs + && std::equal( + std::begin(lhs.data_), std::end(lhs.data_), + std::begin(rhs.data_) + ); +} + +template +inline bool operator!=(const extended& lhs, + const extended& rhs) +{ + return !operator==(lhs, rhs); +} + +template +std::basic_ostream& +operator<<(std::basic_ostream& out, + const extended& rng) +{ + using pcg_extras::operator<<; + + auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); + auto space = out.widen(' '); + auto orig_fill = out.fill(); + + out << rng.multiplier() << space + << rng.increment() << space + << rng.state_; + + for (const auto& datum : rng.data_) + out << space << datum; + + out.flags(orig_flags); + out.fill(orig_fill); + return out; +} + +template +std::basic_istream& +operator>>(std::basic_istream& in, + extended& rng) +{ + extended new_rng; + auto& base_rng = static_cast(new_rng); + in >> base_rng; + + if (in.fail()) + return in; + + using pcg_extras::operator>>; + + auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); + + for (auto& datum : new_rng.data_) { + in >> datum; + if (in.fail()) + goto bail; + } + + rng = new_rng; + +bail: + in.flags(orig_flags); + return in; +} + + + +template +void +extended::advance_table() +{ + bool carry = false; + for (size_t i = 0; i < table_size; ++i) { + if (carry) { + carry = insideout::external_step(data_[i],i+1); + } + bool carry2 = insideout::external_step(data_[i],i+1); + carry = carry || carry2; + } +} + +template +void +extended::advance_table( + state_type delta, bool isForwards) +{ + typedef typename baseclass::state_type base_state_t; + typedef typename extvalclass::state_type ext_state_t; + constexpr bitcount_t basebits = sizeof(base_state_t)*8; + constexpr bitcount_t extbits = sizeof(ext_state_t)*8; + static_assert(basebits <= extbits || advance_pow2 > 0, + "Current implementation might overflow its carry"); + + base_state_t carry = 0; + for (size_t i = 0; i < table_size; ++i) { + base_state_t total_delta = carry + delta; + ext_state_t trunc_delta = ext_state_t(total_delta); + if (basebits > extbits) { + carry = total_delta >> extbits; + } else { + carry = 0; + } + carry += + insideout::external_advance(data_[i],i+1, trunc_delta, isForwards); + } +} + +template +void extended::advance( + state_type distance, bool forwards) +{ + static_assert(kdd, + "Efficient advance is too hard for non-kdd extension. " + "For a weak advance, cast to base class"); + state_type zero = + baseclass::is_mcg ? this->state_ & state_type(3U) : state_type(0U); + if (may_tick) { + state_type ticks = distance >> (advance_pow2*may_tick); + // ^-- stupidity to appease GCC + // warnings + state_type adv_mask = + baseclass::is_mcg ? tick_mask << 2 : tick_mask; + state_type next_advance_distance = this->distance(zero, adv_mask); + if (!forwards) + next_advance_distance = (-next_advance_distance) & tick_mask; + if (next_advance_distance < (distance & tick_mask)) { + ++ticks; + } + if (ticks) + advance_table(ticks, forwards); + } + if (forwards) { + if (may_tock && this->distance(zero) <= distance) + advance_table(); + baseclass::advance(distance); + } else { + if (may_tock && -(this->distance(zero)) <= distance) + advance_table(state_type(1U), false); + baseclass::advance(-distance); + } +} + +} // namespace pcg_detail + +namespace pcg_engines { + +using namespace pcg_detail; + +/* Predefined types for XSH RS */ + +typedef oneseq_base oneseq_xsh_rs_16_8; +typedef oneseq_base oneseq_xsh_rs_32_16; +typedef oneseq_base oneseq_xsh_rs_64_32; +typedef oneseq_base oneseq_xsh_rs_128_64; +typedef oneseq_base + cm_oneseq_xsh_rs_128_64; + +typedef unique_base unique_xsh_rs_16_8; +typedef unique_base unique_xsh_rs_32_16; +typedef unique_base unique_xsh_rs_64_32; +typedef unique_base unique_xsh_rs_128_64; +typedef unique_base + cm_unique_xsh_rs_128_64; + +typedef setseq_base setseq_xsh_rs_16_8; +typedef setseq_base setseq_xsh_rs_32_16; +typedef setseq_base setseq_xsh_rs_64_32; +typedef setseq_base setseq_xsh_rs_128_64; +typedef setseq_base + cm_setseq_xsh_rs_128_64; + +typedef mcg_base mcg_xsh_rs_16_8; +typedef mcg_base mcg_xsh_rs_32_16; +typedef mcg_base mcg_xsh_rs_64_32; +typedef mcg_base mcg_xsh_rs_128_64; +typedef mcg_base + cm_mcg_xsh_rs_128_64; + +/* Predefined types for XSH RR */ + +typedef oneseq_base oneseq_xsh_rr_16_8; +typedef oneseq_base oneseq_xsh_rr_32_16; +typedef oneseq_base oneseq_xsh_rr_64_32; +typedef oneseq_base oneseq_xsh_rr_128_64; +typedef oneseq_base + cm_oneseq_xsh_rr_128_64; + +typedef unique_base unique_xsh_rr_16_8; +typedef unique_base unique_xsh_rr_32_16; +typedef unique_base unique_xsh_rr_64_32; +typedef unique_base unique_xsh_rr_128_64; +typedef unique_base + cm_unique_xsh_rr_128_64; + +typedef setseq_base setseq_xsh_rr_16_8; +typedef setseq_base setseq_xsh_rr_32_16; +typedef setseq_base setseq_xsh_rr_64_32; +typedef setseq_base setseq_xsh_rr_128_64; +typedef setseq_base + cm_setseq_xsh_rr_128_64; + +typedef mcg_base mcg_xsh_rr_16_8; +typedef mcg_base mcg_xsh_rr_32_16; +typedef mcg_base mcg_xsh_rr_64_32; +typedef mcg_base mcg_xsh_rr_128_64; +typedef mcg_base + cm_mcg_xsh_rr_128_64; + + +/* Predefined types for RXS M XS */ + +typedef oneseq_base oneseq_rxs_m_xs_8_8; +typedef oneseq_base oneseq_rxs_m_xs_16_16; +typedef oneseq_base oneseq_rxs_m_xs_32_32; +typedef oneseq_base oneseq_rxs_m_xs_64_64; +typedef oneseq_base + oneseq_rxs_m_xs_128_128; +typedef oneseq_base + cm_oneseq_rxs_m_xs_128_128; + +typedef unique_base unique_rxs_m_xs_8_8; +typedef unique_base unique_rxs_m_xs_16_16; +typedef unique_base unique_rxs_m_xs_32_32; +typedef unique_base unique_rxs_m_xs_64_64; +typedef unique_base unique_rxs_m_xs_128_128; +typedef unique_base + cm_unique_rxs_m_xs_128_128; + +typedef setseq_base setseq_rxs_m_xs_8_8; +typedef setseq_base setseq_rxs_m_xs_16_16; +typedef setseq_base setseq_rxs_m_xs_32_32; +typedef setseq_base setseq_rxs_m_xs_64_64; +typedef setseq_base setseq_rxs_m_xs_128_128; +typedef setseq_base + cm_setseq_rxs_m_xs_128_128; + + // MCG versions don't make sense here, so aren't defined. + +/* Predefined types for RXS M */ + +typedef oneseq_base oneseq_rxs_m_16_8; +typedef oneseq_base oneseq_rxs_m_32_16; +typedef oneseq_base oneseq_rxs_m_64_32; +typedef oneseq_base oneseq_rxs_m_128_64; +typedef oneseq_base + cm_oneseq_rxs_m_128_64; + +typedef unique_base unique_rxs_m_16_8; +typedef unique_base unique_rxs_m_32_16; +typedef unique_base unique_rxs_m_64_32; +typedef unique_base unique_rxs_m_128_64; +typedef unique_base + cm_unique_rxs_m_128_64; + +typedef setseq_base setseq_rxs_m_16_8; +typedef setseq_base setseq_rxs_m_32_16; +typedef setseq_base setseq_rxs_m_64_32; +typedef setseq_base setseq_rxs_m_128_64; +typedef setseq_base + cm_setseq_rxs_m_128_64; + +typedef mcg_base mcg_rxs_m_16_8; +typedef mcg_base mcg_rxs_m_32_16; +typedef mcg_base mcg_rxs_m_64_32; +typedef mcg_base mcg_rxs_m_128_64; +typedef mcg_base + cm_mcg_rxs_m_128_64; + +/* Predefined types for DXSM */ + +typedef oneseq_base oneseq_dxsm_16_8; +typedef oneseq_base oneseq_dxsm_32_16; +typedef oneseq_base oneseq_dxsm_64_32; +typedef oneseq_base oneseq_dxsm_128_64; +typedef oneseq_base + cm_oneseq_dxsm_128_64; + +typedef unique_base unique_dxsm_16_8; +typedef unique_base unique_dxsm_32_16; +typedef unique_base unique_dxsm_64_32; +typedef unique_base unique_dxsm_128_64; +typedef unique_base + cm_unique_dxsm_128_64; + +typedef setseq_base setseq_dxsm_16_8; +typedef setseq_base setseq_dxsm_32_16; +typedef setseq_base setseq_dxsm_64_32; +typedef setseq_base setseq_dxsm_128_64; +typedef setseq_base + cm_setseq_dxsm_128_64; + +typedef mcg_base mcg_dxsm_16_8; +typedef mcg_base mcg_dxsm_32_16; +typedef mcg_base mcg_dxsm_64_32; +typedef mcg_base mcg_dxsm_128_64; +typedef mcg_base + cm_mcg_dxsm_128_64; + +/* Predefined types for XSL RR (only defined for "large" types) */ + +typedef oneseq_base oneseq_xsl_rr_64_32; +typedef oneseq_base oneseq_xsl_rr_128_64; +typedef oneseq_base + cm_oneseq_xsl_rr_128_64; + +typedef unique_base unique_xsl_rr_64_32; +typedef unique_base unique_xsl_rr_128_64; +typedef unique_base + cm_unique_xsl_rr_128_64; + +typedef setseq_base setseq_xsl_rr_64_32; +typedef setseq_base setseq_xsl_rr_128_64; +typedef setseq_base + cm_setseq_xsl_rr_128_64; + +typedef mcg_base mcg_xsl_rr_64_32; +typedef mcg_base mcg_xsl_rr_128_64; +typedef mcg_base + cm_mcg_xsl_rr_128_64; + + +/* Predefined types for XSL RR RR (only defined for "large" types) */ + +typedef oneseq_base + oneseq_xsl_rr_rr_64_64; +typedef oneseq_base + oneseq_xsl_rr_rr_128_128; +typedef oneseq_base + cm_oneseq_xsl_rr_rr_128_128; + +typedef unique_base + unique_xsl_rr_rr_64_64; +typedef unique_base + unique_xsl_rr_rr_128_128; +typedef unique_base + cm_unique_xsl_rr_rr_128_128; + +typedef setseq_base + setseq_xsl_rr_rr_64_64; +typedef setseq_base + setseq_xsl_rr_rr_128_128; +typedef setseq_base + cm_setseq_xsl_rr_rr_128_128; + + // MCG versions don't make sense here, so aren't defined. + +/* Extended generators */ + +template +using ext_std8 = extended; + +template +using ext_std16 = extended; + +template +using ext_std32 = extended; + +template +using ext_std64 = extended; + + +template +using ext_oneseq_rxs_m_xs_32_32 = + ext_std32; + +template +using ext_mcg_xsh_rs_64_32 = + ext_std32; + +template +using ext_oneseq_xsh_rs_64_32 = + ext_std32; + +template +using ext_setseq_xsh_rr_64_32 = + ext_std32; + +template +using ext_mcg_xsl_rr_128_64 = + ext_std64; + +template +using ext_oneseq_xsl_rr_128_64 = + ext_std64; + +template +using ext_setseq_xsl_rr_128_64 = + ext_std64; + +} // namespace pcg_engines + +typedef pcg_engines::setseq_xsh_rr_64_32 pcg32; +typedef pcg_engines::oneseq_xsh_rr_64_32 pcg32_oneseq; +typedef pcg_engines::unique_xsh_rr_64_32 pcg32_unique; +typedef pcg_engines::mcg_xsh_rs_64_32 pcg32_fast; + +typedef pcg_engines::setseq_xsl_rr_128_64 pcg64; +typedef pcg_engines::oneseq_xsl_rr_128_64 pcg64_oneseq; +typedef pcg_engines::unique_xsl_rr_128_64 pcg64_unique; +typedef pcg_engines::mcg_xsl_rr_128_64 pcg64_fast; + +typedef pcg_engines::setseq_rxs_m_xs_8_8 pcg8_once_insecure; +typedef pcg_engines::setseq_rxs_m_xs_16_16 pcg16_once_insecure; +typedef pcg_engines::setseq_rxs_m_xs_32_32 pcg32_once_insecure; +typedef pcg_engines::setseq_rxs_m_xs_64_64 pcg64_once_insecure; +typedef pcg_engines::setseq_xsl_rr_rr_128_128 pcg128_once_insecure; + +typedef pcg_engines::oneseq_rxs_m_xs_8_8 pcg8_oneseq_once_insecure; +typedef pcg_engines::oneseq_rxs_m_xs_16_16 pcg16_oneseq_once_insecure; +typedef pcg_engines::oneseq_rxs_m_xs_32_32 pcg32_oneseq_once_insecure; +typedef pcg_engines::oneseq_rxs_m_xs_64_64 pcg64_oneseq_once_insecure; +typedef pcg_engines::oneseq_xsl_rr_rr_128_128 pcg128_oneseq_once_insecure; + + +// These two extended RNGs provide two-dimensionally equidistributed +// 32-bit generators. pcg32_k2_fast occupies the same space as pcg64, +// and can be called twice to generate 64 bits, but does not required +// 128-bit math; on 32-bit systems, it's faster than pcg64 as well. + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<1,16,true> pcg32_k2; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<1,32,true> pcg32_k2_fast; + +// These eight extended RNGs have about as much state as arc4random +// +// - the k variants are k-dimensionally equidistributed +// - the c variants offer are intended to be harder to predict +// +// (neither is intended for use in cryptographic applications) + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,true> pcg32_k64; +typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,true> pcg32_k64_oneseq; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,true> pcg32_k64_fast; + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,false> pcg32_c64; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,false> pcg32_c64_oneseq; +typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,false> pcg32_c64_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,true> pcg64_k32; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,true> pcg64_k32_oneseq; +typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,true> pcg64_k32_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,false> pcg64_c32; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,false> pcg64_c32_oneseq; +typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,false> pcg64_c32_fast; + +// These eight extended RNGs have more state than the Mersenne twister +// +// - the k variants are k-dimensionally equidistributed +// - the c variants offer are intended to be harder to predict +// +// (neither is intended for use in cryptographic applications) + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,true> pcg32_k1024; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,true> pcg32_k1024_fast; + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,false> pcg32_c1024; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,false> pcg32_c1024_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,true> pcg64_k1024; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,true> pcg64_k1024_fast; + +typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,false> pcg64_c1024; +typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,false> pcg64_c1024_fast; + +// These generators have an insanely huge period (2^524352), and is suitable +// for silly party tricks, such as dumping out 64 KB ZIP files at an arbitrary +// point in the future. [Actually, over the full period of the generator, it +// will produce every 64 KB ZIP file 2^64 times!] + +typedef pcg_engines::ext_setseq_xsh_rr_64_32<14,16,true> pcg32_k16384; +typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true> pcg32_k16384_fast; + +#ifdef _MSC_VER + #pragma warning(default:4146) +#endif + +#endif // PCG_RAND_HPP_INCLUDED