Skip to content

Conversation

@rstub
Copy link
Member

@rstub rstub commented Jun 29, 2025

fixes #95

rstub added 3 commits June 29, 2025 23:27
This is for usage together with the new RNG API in future.
For now only for Xoshiro256++. For usage with the new RNG API in future.
These are (for now) meant for usage together with the new RNG API in future.
@rstub
Copy link
Member Author

rstub commented Jun 29, 2025

Initial attempt at supporting the RNG API in future. It seems to work in principle, but is a bit slow at the moment, probably because there is lots of uint64 <-> string conversion going on.

library(future)

future:::parallel_rng_kind(
              kind = "L'Ecuyer-CMRG",
          set_kind = base::RNGkind,
       next_stream = parallel::nextRNGStream,
    next_substream = parallel::nextRNGSubStream,
           is_seed = future:::is_lecyer_cmrg_seed,
           as_seed = future:::as_lecyer_cmrg_seed
)
#> $kind
#> [1] "L'Ecuyer-CMRG"
#> 
#> $set_kind
#> function (kind = NULL, normal.kind = NULL, sample.kind = NULL) 
#> {
#>     kinds <- c("Wichmann-Hill", "Marsaglia-Multicarry", "Super-Duper", 
#>         "Mersenne-Twister", "Knuth-TAOCP", "user-supplied", "Knuth-TAOCP-2002", 
#>         "L'Ecuyer-CMRG", "default")
#>     n.kinds <- c("Buggy Kinderman-Ramage", "Ahrens-Dieter", "Box-Muller", 
#>         "user-supplied", "Inversion", "Kinderman-Ramage", "default")
#>     s.kinds <- c("Rounding", "Rejection", "default")
#>     do.set <- length(kind) > 0L
#>     if (do.set) {
#>         if (!is.character(kind) || length(kind) > 1L) 
#>             stop("'kind' must be a character string (RNG to be used).")
#>         if (is.na(i.knd <- pmatch(kind, kinds) - 1L)) 
#>             stop(gettextf("'%s' is not a valid abbreviation of an RNG", 
#>                 kind), domain = NA)
#>         if (i.knd == length(kinds) - 1L) 
#>             i.knd <- -1L
#>     }
#>     else i.knd <- NULL
#>     if (!is.null(normal.kind)) {
#>         if (!is.character(normal.kind) || length(normal.kind) != 
#>             1L) 
#>             stop(gettextf("'%s' must be a character string", 
#>                 "normal.kind"), domain = NA)
#>         normal.kind <- pmatch(normal.kind, n.kinds) - 1L
#>         if (is.na(normal.kind)) 
#>             stop(gettextf("'%s' is not a valid choice", normal.kind), 
#>                 domain = NA)
#>         if (normal.kind == 0L) 
#>             warning("buggy version of Kinderman-Ramage generator used", 
#>                 domain = NA)
#>         if (normal.kind == length(n.kinds) - 1L) 
#>             normal.kind <- -1L
#>     }
#>     if (!is.null(sample.kind)) {
#>         if (!is.character(sample.kind) || length(sample.kind) != 
#>             1L) 
#>             stop(gettextf("'%s' must be a character string", 
#>                 "sample.kind"), domain = NA)
#>         sample.kind <- pmatch(sample.kind, s.kinds) - 1L
#>         if (is.na(sample.kind)) 
#>             stop(gettextf("'%s' is not a valid choice", sample.kind), 
#>                 domain = NA)
#>         if (sample.kind == 0L) 
#>             warning("non-uniform 'Rounding' sampler used", domain = NA)
#>         if (sample.kind == length(s.kinds) - 1L) 
#>             sample.kind <- -1L
#>     }
#>     r <- 1L + .Internal(RNGkind(i.knd, normal.kind, sample.kind))
#>     r <- c(kinds[r[1L]], n.kinds[r[2L]], s.kinds[r[3L]])
#>     if (do.set || !is.null(normal.kind) || !is.null(sample.kind)) 
#>         invisible(r)
#>     else r
#> }
#> <bytecode: 0x58da23eee168>
#> <environment: namespace:base>
#> 
#> $next_stream
#> function (seed) 
#> {
#>     if (!is.integer(seed) || seed[1L]%%100L != 7L) 
#>         stop(gettextf("invalid value of %s", "'seed'"), domain = NA)
#>     .Call(C_nextStream, seed)
#> }
#> <bytecode: 0x58da24e7e500>
#> <environment: namespace:parallel>
#> 
#> $next_substream
#> function (seed) 
#> {
#>     if (!is.integer(seed) || seed[1L]%%100L != 7L) 
#>         stop(gettextf("invalid value of %s", "'seed'"), domain = NA)
#>     .Call(C_nextSubStream, seed)
#> }
#> <bytecode: 0x58da24e83140>
#> <environment: namespace:parallel>
#> 
#> $is_seed
#> function (seed) 
#> {
#>     is.numeric(seed) && length(seed) == 7L && all(is.finite(seed)) && 
#>         (seed[1]%%10000L == 407L)
#> }
#> <bytecode: 0x58da24e821f0>
#> <environment: namespace:future>
#> 
#> $as_seed
#> function (seed) 
#> {
#>     if (is.logical(seed)) {
#>         stop_if_not(length(seed) == 1L)
#>         if (!is.na(seed) && !seed) {
#>             stopf("Argument 'seed' must be TRUE if logical: %s", 
#>                 seed)
#>         }
#>         oseed <- get_random_seed()
#>         if (!is.na(seed) && seed) {
#>             if (is_lecyer_cmrg_seed(oseed)) 
#>                 return(oseed)
#>         }
#>         okind <- RNGkind("L'Ecuyer-CMRG")[1]
#>         on.exit(set_random_seed(oseed, kind = okind, set_kind = RNGkind), 
#>             add = TRUE)
#>         return(get_random_seed())
#>     }
#>     stop_if_not(is.numeric(seed), all(is.finite(seed)))
#>     seed <- as.integer(seed)
#>     if (is_lecyer_cmrg_seed(seed)) {
#>         return(seed)
#>     }
#>     if (length(seed) == 1L) {
#>         oseed <- get_random_seed()
#>         okind <- RNGkind("L'Ecuyer-CMRG")[1]
#>         on.exit(set_random_seed(oseed, kind = okind, set_kind = RNGkind), 
#>             add = TRUE)
#>         set.seed(seed)
#>         return(get_random_seed())
#>     }
#>     stopf("Argument 'seed' must be L'Ecuyer-CMRG RNG seed as returned by parallel::nextRNGStream() or an single integer: %s", 
#>         capture.output(str(seed)))
#> }
#> <bytecode: 0x58da24e84f10>
#> <environment: namespace:future>
future:::make_rng_seeds(3, seed = TRUE)
#> [[1]]
#> [1]       10407 -1689160120   222868402 -2053856020 -1630003168 -1354712463
#> [7] -1964658922
#> 
#> [[2]]
#> [1]       10407  1991458783  1776575577 -1166848998    20239795 -1044128190
#> [7]  1102892864
#> 
#> [[3]]
#> [1]      10407  868387449 1695576762  355151608 2069567958  862639667  124732210
system.time(future:::make_rng_seeds(1e5, seed = TRUE))
#>    user  system elapsed 
#>   0.306   0.015   0.322

library(dqrng)
future:::parallel_rng_kind(
              kind = "Xoshiro256++",
          set_kind = dqrng::dqRNGkind,
       next_stream = dqrng:::next_stream,
    next_substream = dqrng:::next_substream,
           is_seed = dqrng:::is_xoshiro256pp_seed,
           as_seed = dqrng:::as_xoshiro256pp_seed
)
#> $kind
#> [1] "Xoshiro256++"
#> 
#> $set_kind
#> function (kind, normal_kind = "ignored") 
#> {
#>     invisible(.Call(`_dqrng_dqRNGkind`, kind, normal_kind))
#> }
#> <bytecode: 0x58da27b66ba8>
#> <environment: namespace:dqrng>
#> 
#> $next_stream
#> function (state) 
#> {
#>     .Call(`_dqrng_next_stream`, state)
#> }
#> <bytecode: 0x58da27b69fc8>
#> <environment: namespace:dqrng>
#> 
#> $next_substream
#> function (state) 
#> {
#>     .Call(`_dqrng_next_substream`, state)
#> }
#> <bytecode: 0x58da27b69698>
#> <environment: namespace:dqrng>
#> 
#> $is_seed
#> function (seed) 
#> {
#>     is.character(seed) && length(seed) == 5L && seed[1] == "xoshiro256++" && 
#>         grep("^[0-9]+$", seed, invert = TRUE) == 1
#> }
#> <bytecode: 0x58da27b68d68>
#> <environment: namespace:dqrng>
#> 
#> $as_seed
#> function (seed) 
#> {
#>     if (is.logical(seed)) {
#>         if (length(seed) != 1L && !is.na(seed) && !seed) {
#>             stop("Argument 'seed' must be TRUE if logical: %s", 
#>                 seed)
#>         }
#>         oseed <- dqrng_get_state()
#>         if (!is.na(seed) && seed) {
#>             if (is_xoshiro256pp_seed(oseed)) 
#>                 return(oseed)
#>         }
#>         on.exit(dqrng_set_state(oseed), add = TRUE)
#>         dqRNGkind("Xoshiro256++")
#>         return(dqrng_get_state())
#>     }
#>     if (is_xoshiro256pp_seed(seed)) {
#>         return(seed)
#>     }
#>     if (is.numeric(seed) && all(is.finite(seed)) && length(seed) <= 
#>         2) {
#>         seed <- as.integer(seed)
#>         oseed <- dqrng_get_state()
#>         on.exit(dqrng_set_state(oseed), add = TRUE)
#>         dqRNGkind("Xoshiro256++")
#>         dqset.seed(seed)
#>         return(dqrng_get_state())
#>     }
#>     stop("Argument 'seed' must be TRUE, Xoshiro256++ RNG state as returned by dqrng_get_state() or an integer vector with length <= 2")
#> }
#> <bytecode: 0x58da27b6bc48>
#> <environment: namespace:dqrng>
future:::make_rng_seeds(3, seed = TRUE)
#> [[1]]
#> [1] "xoshiro256++"         "11366403006693953604" "5776312522536079727" 
#> [4] "9618926254244045008"  "5165000284952699762" 
#> 
#> [[2]]
#> [1] "xoshiro256++"         "16669368997197376375" "18351587136123902063"
#> [4] "4810568700463215920"  "6616614669550573422" 
#> 
#> [[3]]
#> [1] "xoshiro256++"        "9919774969775135560" "2550819741878577113"
#> [4] "1962113790411544427" "2910686945654831949"
system.time(future:::make_rng_seeds(1e5, seed = TRUE))
#>    user  system elapsed 
#>   2.733   0.042   2.806

Created on 2025-06-29 with reprex v2.1.1

@HenrikBengtsson Do you have a test case that one could use for something more realistic?

@github-actions
Copy link

This is how benchmark results would change (along with a 95% confidence interval in relative change) if bacd1f7 is merged into main:

  • ✔️rexp_test: 87.3ms -> 86.3ms [-5.27%, +2.88%]
  • ✔️rnorm_test: 109ms -> 109ms [-2.34%, +2.83%]
  • ✔️runif_test: 65.1ms -> 64.3ms [-3.88%, +1.48%]
    Further explanation regarding interpretation and methodology can be found in the documentation.

@github-actions
Copy link

This is how benchmark results would change (along with a 95% confidence interval in relative change) if 59d7eed is merged into main:

  • ✔️rexp_test: 84ms -> 81.7ms [-6.6%, +1.04%]
  • ✔️rnorm_test: 108ms -> 107ms [-3.33%, +1.33%]
  • ✔️runif_test: 62.1ms -> 64.7ms [-0.11%, +8.7%]
    Further explanation regarding interpretation and methodology can be found in the documentation.

@HenrikBengtsson
Copy link

HenrikBengtsson commented Jun 30, 2025

This is awesome!

@HenrikBengtsson Do you have a test case that one could use for something more realistic?

I don't have anything generic yet, but I have "rng" unit tests in future, future.apply, doFuture, ... They're under tests/test-<name>.R, which each calls <pkg>:::testme("<name>"), but are implemented under inst/testme/test-<name>.R.

library(dqrng)
void <- future:::parallel_rng_kind(
            kind = "Xoshiro256++",
        set_kind = dqrng::dqRNGkind,
     next_stream = dqrng:::next_stream,
  next_substream = dqrng:::next_substream,
         is_seed = dqrng:::is_xoshiro256pp_seed,
         as_seed = dqrng:::as_xoshiro256pp_seed
)


## Give an error, bc assuming Ecuyer-CMRG
future:::testme("rng")
future.apply:::testme("rng")

## Ignored and give warnings, bc assumes integer `seed`
doFuture:::testme("foreach_dofuture,rng")

## Works
future:::testme("rng_utils")
future.callr:::testme("rng")
future.mirai:::testme("rng")

I'll adjust these tests so they're invariant or agile to future:::parallel_rng_kind().

I'll probably also end up adding something to future.tests, but that's later on.

@rstub
Copy link
Member Author

rstub commented Jun 30, 2025

I got some strange messages from future:::testme("rng_utils") that I found hard to interpret, so I made a minimal example where I just sum a bunch of random numbers. Notably, using the default set-up is indeed reproducible. Not so for dqrng. However, I am wondering: How are you actually setting the seed for the individual futures? None of the functions defined via future:::parallel_rng_kind can be used for that.

library(future)
plan(multisession)
library(future.apply)

void <- future:::parallel_rng_kind(
              kind = "L'Ecuyer-CMRG",
          set_kind = base::RNGkind,
       next_stream = parallel::nextRNGStream,
    next_substream = parallel::nextRNGSubStream,
           is_seed = future:::is_lecyer_cmrg_seed,
           as_seed = future:::as_lecyer_cmrg_seed
)

# identical results
future_sapply(1:8, \(x){sum(runif(1e6))}, future.seed = 42)
#> [1] 499815.2 499821.5 499642.8 499897.5 499980.7 499645.6 499786.0 500107.4
future_sapply(1:8, \(x){sum(runif(1e6))}, future.seed = 42)
#> [1] 499815.2 499821.5 499642.8 499897.5 499980.7 499645.6 499786.0 500107.4

library(dqrng)
void <- future:::parallel_rng_kind(
              kind = "Xoshiro256++",
          set_kind = dqrng::dqRNGkind,
       next_stream = dqrng:::next_stream,
    next_substream = dqrng:::next_substream,
           is_seed = dqrng:::is_xoshiro256pp_seed,
           as_seed = dqrng:::as_xoshiro256pp_seed
)

# *not* identical results
future_sapply(1:8, \(x){sum(dqrunif(1e6))}, future.seed = 42)
#> [1] 499899.5 499831.8 500003.0 500008.3 499772.2 499348.9 499966.8 499665.0
future_sapply(1:8, \(x){sum(dqrunif(1e6))}, future.seed = 42)
#> [1] 500143.0 500230.2 500017.7 499620.6 499994.3 499810.3 500185.4 499568.3

Created on 2025-06-30 with reprex v2.1.1

@rstub
Copy link
Member Author

rstub commented Jun 30, 2025

And executing the base variant with the dqrng config gives warnings like

1: In runif(1e+06) :
  '.Random.seed' is not an integer vector but of type 'character', so ignored

So it looks like you are assigning the generated seeds directly to .Random.seed, right?

@HenrikBengtsson
Copy link

So it looks like you are assigning the generated seeds directly to .Random.seed, right?

Yes. It has slowly started to sink in what you've told me about dqRNG is doing its own thing.

So, at a minimum, it sounds like future:::parallel_rng_kind() also need a set_state(state) function. Does that sound about right? I also think I need a get_state() to be able to detect when the RNG state has been forwarded by a future without declaring (seed = TRUE) it will do so.

BTW, for parallel processing, we also need to initiate each parallel worker using dqrng::register_methods() so that any code and packages using standard RNG functions in R will be served dqrng random numbers - is that correct?

@rstub
Copy link
Member Author

rstub commented Jul 1, 2025

So, at a minimum, it sounds like future:::parallel_rng_kind() also need a set_state(state) function. Does that sound about right?

Yes, that sounds right. Since the other functions use seed instead of state I would stick to the former.

I also think I need a get_state() to be able to detect when the RNG state has been forwarded by a future without declaring (seed = TRUE) it will do so.

I am not sure. I am still trying to wrap my head around the

BTW, for parallel processing, we also need to initiate each parallel worker using dqrng::register_methods() so that any code and packages using standard RNG functions in R will be served dqrng random numbers - is that correct?

That is a very good idea. Maybe it would make sense to have a general init(seed) function which sets the seed and does other things like registering the RNG.

@rstub
Copy link
Member Author

rstub commented Jul 6, 2025

I was curious to have a closer look at this:

So it looks like you are assigning the generated seeds directly to .Random.seed, right?

Yes. It has slowly started to sink in what you've told me about dqRNG is doing its own thing.

Basically I have build a small package that takes some of my ideas for "how I would implement an RNG package today" and puts them into practice: https://rstub.codeberg.page/xoshiro/

At the moment it is not really fast, since there is a lot of back and forth going on with user defined RNGs. For good performance one needs special purpose functions. However, it does make use of set.seed() and .Random.seed like the build in RNGs and so the current functionality in future does work here:

library(xoshiro)
library(future.apply)
#> Loading required package: future
plan(multisession)

void <- future:::parallel_rng_kind(
              kind = "user",
          set_kind = base::RNGkind,
       next_stream = xoshiro:::next_stream,
    next_substream = xoshiro:::next_substream,
           is_seed = xoshiro:::is_xoshiro256pp_seed,
           as_seed = xoshiro:::as_xoshiro256pp_seed
)

# identical results
future_sapply(1:8, \(x){sum(runif(1e6))}, future.packages = "xoshiro", future.seed = 42)
#> [1] 500076.5 500328.9 500158.6 499847.2 500373.7 499792.5 499822.4 499186.2
future_sapply(1:8, \(x){sum(runif(1e6))}, future.packages = "xoshiro", future.seed = 42)
#> [1] 500076.5 500328.9 500158.6 499847.2 500373.7 499792.5 499822.4 499186.2

Note that I think this only works because the RNG gets automatically registered as a user defined RNG upon loading the package.

My xoshiro:::as_xoshiro256pp_seed is almost identical to your future:::as_lecyer_cmrg_seed and currently uses several internal functions from future. Not sure if there is a way to do this w/o all that code duplication.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Prepare for custom RNGs in future package

3 participants