-
Notifications
You must be signed in to change notification settings - Fork 8
Future custom rng #98
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This is for usage together with the new RNG API in future.
For now only for Xoshiro256++. For usage with the new RNG API in future.
These are (for now) meant for usage together with the new RNG API in future.
|
Initial attempt at supporting the RNG API in library(future)
future:::parallel_rng_kind(
kind = "L'Ecuyer-CMRG",
set_kind = base::RNGkind,
next_stream = parallel::nextRNGStream,
next_substream = parallel::nextRNGSubStream,
is_seed = future:::is_lecyer_cmrg_seed,
as_seed = future:::as_lecyer_cmrg_seed
)
#> $kind
#> [1] "L'Ecuyer-CMRG"
#>
#> $set_kind
#> function (kind = NULL, normal.kind = NULL, sample.kind = NULL)
#> {
#> kinds <- c("Wichmann-Hill", "Marsaglia-Multicarry", "Super-Duper",
#> "Mersenne-Twister", "Knuth-TAOCP", "user-supplied", "Knuth-TAOCP-2002",
#> "L'Ecuyer-CMRG", "default")
#> n.kinds <- c("Buggy Kinderman-Ramage", "Ahrens-Dieter", "Box-Muller",
#> "user-supplied", "Inversion", "Kinderman-Ramage", "default")
#> s.kinds <- c("Rounding", "Rejection", "default")
#> do.set <- length(kind) > 0L
#> if (do.set) {
#> if (!is.character(kind) || length(kind) > 1L)
#> stop("'kind' must be a character string (RNG to be used).")
#> if (is.na(i.knd <- pmatch(kind, kinds) - 1L))
#> stop(gettextf("'%s' is not a valid abbreviation of an RNG",
#> kind), domain = NA)
#> if (i.knd == length(kinds) - 1L)
#> i.knd <- -1L
#> }
#> else i.knd <- NULL
#> if (!is.null(normal.kind)) {
#> if (!is.character(normal.kind) || length(normal.kind) !=
#> 1L)
#> stop(gettextf("'%s' must be a character string",
#> "normal.kind"), domain = NA)
#> normal.kind <- pmatch(normal.kind, n.kinds) - 1L
#> if (is.na(normal.kind))
#> stop(gettextf("'%s' is not a valid choice", normal.kind),
#> domain = NA)
#> if (normal.kind == 0L)
#> warning("buggy version of Kinderman-Ramage generator used",
#> domain = NA)
#> if (normal.kind == length(n.kinds) - 1L)
#> normal.kind <- -1L
#> }
#> if (!is.null(sample.kind)) {
#> if (!is.character(sample.kind) || length(sample.kind) !=
#> 1L)
#> stop(gettextf("'%s' must be a character string",
#> "sample.kind"), domain = NA)
#> sample.kind <- pmatch(sample.kind, s.kinds) - 1L
#> if (is.na(sample.kind))
#> stop(gettextf("'%s' is not a valid choice", sample.kind),
#> domain = NA)
#> if (sample.kind == 0L)
#> warning("non-uniform 'Rounding' sampler used", domain = NA)
#> if (sample.kind == length(s.kinds) - 1L)
#> sample.kind <- -1L
#> }
#> r <- 1L + .Internal(RNGkind(i.knd, normal.kind, sample.kind))
#> r <- c(kinds[r[1L]], n.kinds[r[2L]], s.kinds[r[3L]])
#> if (do.set || !is.null(normal.kind) || !is.null(sample.kind))
#> invisible(r)
#> else r
#> }
#> <bytecode: 0x58da23eee168>
#> <environment: namespace:base>
#>
#> $next_stream
#> function (seed)
#> {
#> if (!is.integer(seed) || seed[1L]%%100L != 7L)
#> stop(gettextf("invalid value of %s", "'seed'"), domain = NA)
#> .Call(C_nextStream, seed)
#> }
#> <bytecode: 0x58da24e7e500>
#> <environment: namespace:parallel>
#>
#> $next_substream
#> function (seed)
#> {
#> if (!is.integer(seed) || seed[1L]%%100L != 7L)
#> stop(gettextf("invalid value of %s", "'seed'"), domain = NA)
#> .Call(C_nextSubStream, seed)
#> }
#> <bytecode: 0x58da24e83140>
#> <environment: namespace:parallel>
#>
#> $is_seed
#> function (seed)
#> {
#> is.numeric(seed) && length(seed) == 7L && all(is.finite(seed)) &&
#> (seed[1]%%10000L == 407L)
#> }
#> <bytecode: 0x58da24e821f0>
#> <environment: namespace:future>
#>
#> $as_seed
#> function (seed)
#> {
#> if (is.logical(seed)) {
#> stop_if_not(length(seed) == 1L)
#> if (!is.na(seed) && !seed) {
#> stopf("Argument 'seed' must be TRUE if logical: %s",
#> seed)
#> }
#> oseed <- get_random_seed()
#> if (!is.na(seed) && seed) {
#> if (is_lecyer_cmrg_seed(oseed))
#> return(oseed)
#> }
#> okind <- RNGkind("L'Ecuyer-CMRG")[1]
#> on.exit(set_random_seed(oseed, kind = okind, set_kind = RNGkind),
#> add = TRUE)
#> return(get_random_seed())
#> }
#> stop_if_not(is.numeric(seed), all(is.finite(seed)))
#> seed <- as.integer(seed)
#> if (is_lecyer_cmrg_seed(seed)) {
#> return(seed)
#> }
#> if (length(seed) == 1L) {
#> oseed <- get_random_seed()
#> okind <- RNGkind("L'Ecuyer-CMRG")[1]
#> on.exit(set_random_seed(oseed, kind = okind, set_kind = RNGkind),
#> add = TRUE)
#> set.seed(seed)
#> return(get_random_seed())
#> }
#> stopf("Argument 'seed' must be L'Ecuyer-CMRG RNG seed as returned by parallel::nextRNGStream() or an single integer: %s",
#> capture.output(str(seed)))
#> }
#> <bytecode: 0x58da24e84f10>
#> <environment: namespace:future>
future:::make_rng_seeds(3, seed = TRUE)
#> [[1]]
#> [1] 10407 -1689160120 222868402 -2053856020 -1630003168 -1354712463
#> [7] -1964658922
#>
#> [[2]]
#> [1] 10407 1991458783 1776575577 -1166848998 20239795 -1044128190
#> [7] 1102892864
#>
#> [[3]]
#> [1] 10407 868387449 1695576762 355151608 2069567958 862639667 124732210
system.time(future:::make_rng_seeds(1e5, seed = TRUE))
#> user system elapsed
#> 0.306 0.015 0.322
library(dqrng)
future:::parallel_rng_kind(
kind = "Xoshiro256++",
set_kind = dqrng::dqRNGkind,
next_stream = dqrng:::next_stream,
next_substream = dqrng:::next_substream,
is_seed = dqrng:::is_xoshiro256pp_seed,
as_seed = dqrng:::as_xoshiro256pp_seed
)
#> $kind
#> [1] "Xoshiro256++"
#>
#> $set_kind
#> function (kind, normal_kind = "ignored")
#> {
#> invisible(.Call(`_dqrng_dqRNGkind`, kind, normal_kind))
#> }
#> <bytecode: 0x58da27b66ba8>
#> <environment: namespace:dqrng>
#>
#> $next_stream
#> function (state)
#> {
#> .Call(`_dqrng_next_stream`, state)
#> }
#> <bytecode: 0x58da27b69fc8>
#> <environment: namespace:dqrng>
#>
#> $next_substream
#> function (state)
#> {
#> .Call(`_dqrng_next_substream`, state)
#> }
#> <bytecode: 0x58da27b69698>
#> <environment: namespace:dqrng>
#>
#> $is_seed
#> function (seed)
#> {
#> is.character(seed) && length(seed) == 5L && seed[1] == "xoshiro256++" &&
#> grep("^[0-9]+$", seed, invert = TRUE) == 1
#> }
#> <bytecode: 0x58da27b68d68>
#> <environment: namespace:dqrng>
#>
#> $as_seed
#> function (seed)
#> {
#> if (is.logical(seed)) {
#> if (length(seed) != 1L && !is.na(seed) && !seed) {
#> stop("Argument 'seed' must be TRUE if logical: %s",
#> seed)
#> }
#> oseed <- dqrng_get_state()
#> if (!is.na(seed) && seed) {
#> if (is_xoshiro256pp_seed(oseed))
#> return(oseed)
#> }
#> on.exit(dqrng_set_state(oseed), add = TRUE)
#> dqRNGkind("Xoshiro256++")
#> return(dqrng_get_state())
#> }
#> if (is_xoshiro256pp_seed(seed)) {
#> return(seed)
#> }
#> if (is.numeric(seed) && all(is.finite(seed)) && length(seed) <=
#> 2) {
#> seed <- as.integer(seed)
#> oseed <- dqrng_get_state()
#> on.exit(dqrng_set_state(oseed), add = TRUE)
#> dqRNGkind("Xoshiro256++")
#> dqset.seed(seed)
#> return(dqrng_get_state())
#> }
#> stop("Argument 'seed' must be TRUE, Xoshiro256++ RNG state as returned by dqrng_get_state() or an integer vector with length <= 2")
#> }
#> <bytecode: 0x58da27b6bc48>
#> <environment: namespace:dqrng>
future:::make_rng_seeds(3, seed = TRUE)
#> [[1]]
#> [1] "xoshiro256++" "11366403006693953604" "5776312522536079727"
#> [4] "9618926254244045008" "5165000284952699762"
#>
#> [[2]]
#> [1] "xoshiro256++" "16669368997197376375" "18351587136123902063"
#> [4] "4810568700463215920" "6616614669550573422"
#>
#> [[3]]
#> [1] "xoshiro256++" "9919774969775135560" "2550819741878577113"
#> [4] "1962113790411544427" "2910686945654831949"
system.time(future:::make_rng_seeds(1e5, seed = TRUE))
#> user system elapsed
#> 2.733 0.042 2.806Created on 2025-06-29 with reprex v2.1.1 @HenrikBengtsson Do you have a test case that one could use for something more realistic? |
|
This is how benchmark results would change (along with a 95% confidence interval in relative change) if bacd1f7 is merged into main:
|
|
This is how benchmark results would change (along with a 95% confidence interval in relative change) if 59d7eed is merged into main:
|
|
This is awesome!
I don't have anything generic yet, but I have "rng" unit tests in future, future.apply, doFuture, ... They're under library(dqrng)
void <- future:::parallel_rng_kind(
kind = "Xoshiro256++",
set_kind = dqrng::dqRNGkind,
next_stream = dqrng:::next_stream,
next_substream = dqrng:::next_substream,
is_seed = dqrng:::is_xoshiro256pp_seed,
as_seed = dqrng:::as_xoshiro256pp_seed
)
## Give an error, bc assuming Ecuyer-CMRG
future:::testme("rng")
future.apply:::testme("rng")
## Ignored and give warnings, bc assumes integer `seed`
doFuture:::testme("foreach_dofuture,rng")
## Works
future:::testme("rng_utils")
future.callr:::testme("rng")
future.mirai:::testme("rng")I'll adjust these tests so they're invariant or agile to I'll probably also end up adding something to future.tests, but that's later on. |
|
I got some strange messages from library(future)
plan(multisession)
library(future.apply)
void <- future:::parallel_rng_kind(
kind = "L'Ecuyer-CMRG",
set_kind = base::RNGkind,
next_stream = parallel::nextRNGStream,
next_substream = parallel::nextRNGSubStream,
is_seed = future:::is_lecyer_cmrg_seed,
as_seed = future:::as_lecyer_cmrg_seed
)
# identical results
future_sapply(1:8, \(x){sum(runif(1e6))}, future.seed = 42)
#> [1] 499815.2 499821.5 499642.8 499897.5 499980.7 499645.6 499786.0 500107.4
future_sapply(1:8, \(x){sum(runif(1e6))}, future.seed = 42)
#> [1] 499815.2 499821.5 499642.8 499897.5 499980.7 499645.6 499786.0 500107.4
library(dqrng)
void <- future:::parallel_rng_kind(
kind = "Xoshiro256++",
set_kind = dqrng::dqRNGkind,
next_stream = dqrng:::next_stream,
next_substream = dqrng:::next_substream,
is_seed = dqrng:::is_xoshiro256pp_seed,
as_seed = dqrng:::as_xoshiro256pp_seed
)
# *not* identical results
future_sapply(1:8, \(x){sum(dqrunif(1e6))}, future.seed = 42)
#> [1] 499899.5 499831.8 500003.0 500008.3 499772.2 499348.9 499966.8 499665.0
future_sapply(1:8, \(x){sum(dqrunif(1e6))}, future.seed = 42)
#> [1] 500143.0 500230.2 500017.7 499620.6 499994.3 499810.3 500185.4 499568.3Created on 2025-06-30 with reprex v2.1.1 |
|
And executing the base variant with the So it looks like you are assigning the generated seeds directly to |
Yes. It has slowly started to sink in what you've told me about dqRNG is doing its own thing. So, at a minimum, it sounds like BTW, for parallel processing, we also need to initiate each parallel worker using |
Yes, that sounds right. Since the other functions use
I am not sure. I am still trying to wrap my head around the
That is a very good idea. Maybe it would make sense to have a general |
|
I was curious to have a closer look at this:
Basically I have build a small package that takes some of my ideas for "how I would implement an RNG package today" and puts them into practice: https://rstub.codeberg.page/xoshiro/ At the moment it is not really fast, since there is a lot of back and forth going on with user defined RNGs. For good performance one needs special purpose functions. However, it does make use of library(xoshiro)
library(future.apply)
#> Loading required package: future
plan(multisession)
void <- future:::parallel_rng_kind(
kind = "user",
set_kind = base::RNGkind,
next_stream = xoshiro:::next_stream,
next_substream = xoshiro:::next_substream,
is_seed = xoshiro:::is_xoshiro256pp_seed,
as_seed = xoshiro:::as_xoshiro256pp_seed
)
# identical results
future_sapply(1:8, \(x){sum(runif(1e6))}, future.packages = "xoshiro", future.seed = 42)
#> [1] 500076.5 500328.9 500158.6 499847.2 500373.7 499792.5 499822.4 499186.2
future_sapply(1:8, \(x){sum(runif(1e6))}, future.packages = "xoshiro", future.seed = 42)
#> [1] 500076.5 500328.9 500158.6 499847.2 500373.7 499792.5 499822.4 499186.2Note that I think this only works because the RNG gets automatically registered as a user defined RNG upon loading the package. My |
fixes #95