Created
April 16, 2026 23:49
-
-
Save jonocarroll/9468ef7473a02c2d5ea2e2c119703559 to your computer and use it in GitHub Desktop.
Find the seeds which produce a random sample of 'HELLO' and 'WORLD'
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Monkeys, Shakespeare, typewriters, &c. ---- | |
| # Based on Andrew Heiss' https://gist.github.com/andrewheiss/461c749fce2783551f31611d8f0d7548 | |
| library(tidyverse) | |
| # Use 9 CPUs | |
| mirai::daemons(9) | |
| # Loop through just enough seeds to find both 'HELLO' and 'WORLD' | |
| # reducted to the appropriate value after finding them | |
| number_of_seeds <- 1.2e7 | |
| # Split this up into chunks of 100,000 seeds and loop through all those chunks | |
| # in parallel with the magic of in_parallel() and {mirai} | |
| chunk_size <- 1e5 | |
| tictoc::tic() | |
| possible_seeds <- data.frame(seed = 1:number_of_seeds) |> | |
| mutate(chunk = seed %/% chunk_size) |> | |
| nest(.by = chunk) |> | |
| mutate( | |
| hw = map( | |
| data, | |
| in_parallel(\(chunk) { | |
| # Surprise! mirai workers don't use Mersenne Twister for their PRNG | |
| # algorithm. They use L’Ecuyer-CMRG, which is specially designed for | |
| # parallel processing (it seems to space out the X_n locations in the | |
| # series for good parallelization?): | |
| # | |
| # https://tidyverse.org/blog/2025/09/mirai-2-5-0/#reproducible-parallel-rng | |
| # | |
| # So without setting the RNG kind to Mersenne Twister, this loop will | |
| # find seeds that match HEISS, but in the L’Ecuyer-CMRG world, not the | |
| # Mersenne Twister world. | |
| # In this case, I'm not worried about statistically valid parallel PRNG | |
| # series. I want to loop through the Mersenne Twister world to find | |
| # seeds I can use in regular old set.seed() for regular old random R functions | |
| RNGkind("Mersenne-Twister", "Inversion", "Rejection") | |
| purrr::map_lgl(chunk$seed, \(s) { | |
| set.seed(s) | |
| word <- paste0(LETTERS[sample(26, 5, replace = TRUE)], collapse = "") | |
| word == "HELLO" || word == "WORLD" | |
| }) | |
| }) | |
| ) | |
| ) |> | |
| unnest(c(data, hw)) | |
| tictoc::toc() | |
| #> 16.582 sec elapsed | |
| hw_seeds <- possible_seeds |> | |
| filter(hw) | |
| #> # A tibble: 2 × 3 | |
| #> chunk seed hw | |
| #> <dbl> <int> <lgl> | |
| #> 1 25 2505587 TRUE | |
| #> 2 111 11135560 TRUE | |
| map_chr(hw_seeds$seed, \(s) { | |
| withr::with_seed(s, { | |
| paste0(LETTERS[sample(26, 5, replace = TRUE)], collapse = "") | |
| }) | |
| }) | |
| #> [1] "HELLO" "WORLD" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment