suppressPackageStartupMessages({
library(terra)
library(ggplot2)
library(spatialsample)
library(rsample)
library(sf)
library(dplyr)
})
f <- system.file("ex/elev.tif", package = "terra")
r <- rast(f)
plot(r)
elev_df <- as.data.frame(r, xy = TRUE)
head(elev_df)
#> x y elevation
#> 127 6.004167 50.17917 529
#> 128 6.012500 50.17917 542
#> 129 6.020833 50.17917 547
#> 130 6.029167 50.17917 535
#> 218 5.970833 50.17083 485
#> 219 5.979167 50.17083 497
nrow(elev_df)
#> [1] 4608
elev_sf <- st_as_sf(elev_df, coords = c("x", "y")) |>
sf::st_set_crs(terra::crs(r))
spcv <- spatial_clustering_cv(
elev_sf,
v = 50 # how many splits?
)
# look at the spatial clusers
autoplot(spcv) +
theme_light() +
theme(legend.position = "none")
bcv <- mutate(spcv, splits = purrr::map(splits, assessment))
# Function to perform spatial block bootstrapping
spatial_block_bootstrap <- function(blocks, times) {
replicate(times,
{
sampled_blocks <- sample(blocks, replace = TRUE)
do.call(rbind, sampled_blocks)
},
simplify = FALSE
)
}
# Extract the blocks from the cross-validation object
blocks <- bcv$splits
# Perform spatial block bootstrapping
bdf_sp <- spatial_block_bootstrap(blocks, times = 100) |>
bind_rows(.id = "replicate") |>
mutate(
bs_type = "spatial_block_bootstrap",
id = replicate
)
# Function to perform ordinary bootstrapping
bdf_ordinary <- bootstraps(elev_sf, times = 100) |>
mutate(splits = purrr::map(splits, analysis)) |>
tidyr::unnest(cols = splits) |>
mutate(bs_type = "ordinary_bootstrap")
# Combine the two bootstrapped datasets and plot the results
bind_rows(bdf_sp, bdf_ordinary) |>
ggplot() +
aes(x = elevation, group = id) +
stat_density(
geom = "line", color = "#0f9e97", alpha = 0.1, position = "identity"
) +
theme_light() +
facet_wrap(~bs_type)
# Function to calculate the bootstrap estimate and confidence interval
bs_stat_ci <- function(x, f = sum) {
bs_sum <- sf::st_drop_geometry(x) |>
group_by(id) |>
summarise(stat = f(elevation))
tibble(
stat = mean(bs_sum$stat),
qi_low = quantile(bs_sum$stat, 0.025),
qi_high = quantile(bs_sum$stat, 0.975),
p_error = (qi_high - qi_low) / stat
)
}
bs_stat_ci(bdf_sp)
#> # A tibble: 1 × 4
#> stat qi_low qi_high p_error
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1601492. 1441811. 1818061. 0.235
bs_stat_ci(bdf_ordinary)
#> # A tibble: 1 × 4
#> stat qi_low qi_high p_error
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1604672. 1595111. 1614591. 0.0121
Created on 2025-01-07 with reprex v2.1.1
Thought... Using kmeans for the blocks does not ensure equally sized replacement. Maybe this is okay for some stats like mean but maybe this could be a problem for sum. Can we create clusters of equally size?