Problem statement

Given a set \(S = {s_1, s_2, \dots, s_n}\), one would like to sample a subset of \(X \subset S\) of size \(m\). If this operation needs to be repeated for a very large number of times \(k\), what is the most efficient way?

set_S = c(1:100)
microbenchmark::microbenchmark(sample(set_S, size = 50), times = 10)
## Unit: microseconds
##                      expr  min    lq   mean median    uq    max neval
##  sample(set_S, size = 50) 5.34 5.388 8.9986  5.693 6.157 35.265    10

Alternatives

K = 100
m = 50
option_a = function() {
    sapply(1:K, function(i) {
        sample(set_S, size = m)
    })
}

option_b = function() {
    sapply(1:K, function(i) {
        .Internal(sample, set_S, m, FALSE, NULL)
    })
}
microbenchmark::microbenchmark(option_a, option_b)
## Unit: nanoseconds
##      expr min lq  mean median uq  max neval
##  option_a  33 33 40.48     34 34  537   100
##  option_b  33 33 92.69     33 45 5525   100