h-a-graham · January 8, 2025 15:29 · h-a-graham · Jan 7, 2025 · h-a-graham · Jan 7, 2025
diff --git a/sp_block_bootstrap.md b/sp_block_bootstrap.md
diff --git a/sp_block_bootstrap_mlr.md b/sp_block_bootstrap_mlr.md
diff --git a/spatial_cluster_bootstrap.R b/spatial_cluster_bootstrap.R
 #' internal function to perform spatial block bootstrap
 #' @param dt data.table
 #' @param n_b number of bootstrap samples
 #' @return data.table with bootstrap samples
 #' @import data.table
 #' @keywords internal
 #' @noRd
 spatial_block_bootstrap <- function(dt, tc, f, n_b, idc, progress) {
  dt <- as.data.table(dt)
  unique_folds <- unique(dt$fold)
  # Sample folds with replacement
  bs_samp <- purrr::map_dbl(
    seq_len(n_b), function(x) {
      # browser()
      sampled_folds <- sample(unique_folds, replace = TRUE)
      sampled_folds_dt <- data.table(fold = sampled_folds)
      bootstrap_sample <- dt[
        sampled_folds_dt,
        on = .(fold),
        allow.cartesian = TRUE
      ]

      f(bootstrap_sample[[tc]])
    },
    .progress = progress
  )
 }

 #' convert SpatRaster to sf
 #' @param r SpatRaster
 #' @return sf object
 #' @keywords internal
 #' @noRd
 rast_to_sf <- function(r) {
  checkmate::assert_class(r, "SpatRaster")
  x <- as.data.frame(r, xy = TRUE) |>
    sf::st_as_sf(coords = c("x", "y")) |>
    sf::st_set_crs(terra::crs(r))

  x$row_id <- seq_len(nrow(x))
  return(x)
 }


 #' Generate spatial bootstrap statistics for a SpatRaster object.
 #' @param x a SpatRaster object
 #' @param n_folds number of folds
 #' @param n_bootstrap number of bootstrap samples
 #' @param idcol column name for bootstrap id
 #' @param progress logical should progress be shown
 #' @param sample_fold_n minimum number of points to plot in each for plotting
 #' @return spatboot object with bootstrap results
 #' @import mlr3spatiotempcv mlr3 data.table
 #' @export
 #' @details
 #' This function calculates bootstrap statistics for a single band of a SpatRaster
 #' object. It does this using spatially clustered bootstrap samples. The function
 #' first converts the SpatRaster to an sf object, then uses the mlr3spatiotempcv
 #' package to generate spatially clustered folds. The function then samples these
 #' folds with replacement to generate bootstrap samples. The function returns a
 #' `spatboot` object which contains the bootstrap samples, the bootstrap statistic,
 #' the bootstrap confidence interval, the percentage confidence interval, and a ggplot
 #' object of the folds.
 spatial_cluster_bootstrap <- function(
    x, target_col, f, n_folds = 100, n_bootstrap = 100, alpha = 0.05,
    idcol = "boot_id", progress = TRUE, sample_fold_n = 1e3L) {
  checkmate::assert_class(x, "SpatRaster")
  checkmate::assert_character(target_col, max.len = 1)
  checkmate::assert_function(f)
  checkmate::assert_numeric(n_folds, lower = 3, max.len = 1)
  checkmate::assert_numeric(n_bootstrap, lower = 1, max.len = 1)
  checkmate::assert_character(idcol, max.len = 1)
  checkmate::assert_logical(progress, max.len = 1)
  checkmate::assert_integer(sample_fold_n, lower = 1, max.len = 1)

  x <- rast_to_sf(x)

  x_tsk <- mlr3spatiotempcv::as_task_regr_st(
    x,
    target = "row_id"
  )
  # Instantiate Resampling
  rcv <- mlr3::rsmp("spcv_coords", folds = n_folds)
  rcv$instantiate(x_tsk)


  if (nrow(x) / n_folds < sample_fold_n) {
    sample_fold_n <- NULL
  }

  p <- suppressMessages(
    autoplot(rcv, x_tsk, sample_fold_n = sample_fold_n) +
      scale_colour_viridis_d() +
      theme_light() +
      theme(legend.position = "none")
  )


  # left join with data.table between elev_sf and rsmp_idx by row_id
  x_sf_folds <- merge(as.data.table(x), rcv$instance, by = "row_id")
  boot_vec <- spatial_block_bootstrap(
    x_sf_folds, target_col, f, n_bootstrap, idcol, progress
  )

  boot_stat <- mean(boot_vec)

  boot_stat_ci_low <- quantile(boot_vec, alpha / 2)[[1]]
  boot_stat_ci_high <- quantile(boot_vec, 1 - alpha / 2)[[1]]

  sp_boot_obj <- list(
    boot_results = boot_vec,
    boot_stat = boot_stat,
    boot_stat_ci_low = boot_stat_ci_low,
    boot_stat_ci_high = boot_stat_ci_high,
    boot_stat_ci_perc = ((boot_stat_ci_high - boot_stat_ci_low) / boot_stat) * 100,
    fold_plot = p
  )
  class(sp_boot_obj) <- "spatboot"
  return(sp_boot_obj)
 }
	#' internal function to perform spatial block bootstrap
	#' @param dt data.table
	#' @param n_b number of bootstrap samples
	#' @return data.table with bootstrap samples
	#' @import data.table
	#' @keywords internal
	#' @noRd
	spatial_block_bootstrap <- function(dt, tc, f, n_b, idc, progress) {
	dt <- as.data.table(dt)
	unique_folds <- unique(dt$fold)
	# Sample folds with replacement
	bs_samp <- purrr::map_dbl(
	seq_len(n_b), function(x) {
	# browser()
	sampled_folds <- sample(unique_folds, replace = TRUE)
	sampled_folds_dt <- data.table(fold = sampled_folds)
	bootstrap_sample <- dt[
	sampled_folds_dt,
	on = .(fold),
	allow.cartesian = TRUE
	]

	f(bootstrap_sample[[tc]])
	},
	.progress = progress
	)
	}

	#' convert SpatRaster to sf
	#' @param r SpatRaster
	#' @return sf object
	#' @keywords internal
	#' @noRd
	rast_to_sf <- function(r) {
	checkmate::assert_class(r, "SpatRaster")
	x <- as.data.frame(r, xy = TRUE) \|>
	sf::st_as_sf(coords = c("x", "y")) \|>
	sf::st_set_crs(terra::crs(r))

	x$row_id <- seq_len(nrow(x))
	return(x)
	}


	#' Generate spatial bootstrap statistics for a SpatRaster object.
	#' @param x a SpatRaster object
	#' @param n_folds number of folds
	#' @param n_bootstrap number of bootstrap samples
	#' @param idcol column name for bootstrap id
	#' @param progress logical should progress be shown
	#' @param sample_fold_n minimum number of points to plot in each for plotting
	#' @return spatboot object with bootstrap results
	#' @import mlr3spatiotempcv mlr3 data.table
	#' @export
	#' @details
	#' This function calculates bootstrap statistics for a single band of a SpatRaster
	#' object. It does this using spatially clustered bootstrap samples. The function
	#' first converts the SpatRaster to an sf object, then uses the mlr3spatiotempcv
	#' package to generate spatially clustered folds. The function then samples these
	#' folds with replacement to generate bootstrap samples. The function returns a
	#' `spatboot` object which contains the bootstrap samples, the bootstrap statistic,
	#' the bootstrap confidence interval, the percentage confidence interval, and a ggplot
	#' object of the folds.
	spatial_cluster_bootstrap <- function(
	x, target_col, f, n_folds = 100, n_bootstrap = 100, alpha = 0.05,
	idcol = "boot_id", progress = TRUE, sample_fold_n = 1e3L) {
	checkmate::assert_class(x, "SpatRaster")
	checkmate::assert_character(target_col, max.len = 1)
	checkmate::assert_function(f)
	checkmate::assert_numeric(n_folds, lower = 3, max.len = 1)
	checkmate::assert_numeric(n_bootstrap, lower = 1, max.len = 1)
	checkmate::assert_character(idcol, max.len = 1)
	checkmate::assert_logical(progress, max.len = 1)
	checkmate::assert_integer(sample_fold_n, lower = 1, max.len = 1)

	x <- rast_to_sf(x)

	x_tsk <- mlr3spatiotempcv::as_task_regr_st(
	x,
	target = "row_id"
	)
	# Instantiate Resampling
	rcv <- mlr3::rsmp("spcv_coords", folds = n_folds)
	rcv$instantiate(x_tsk)


	if (nrow(x) / n_folds < sample_fold_n) {
	sample_fold_n <- NULL
	}

	p <- suppressMessages(
	autoplot(rcv, x_tsk, sample_fold_n = sample_fold_n) +
	scale_colour_viridis_d() +
	theme_light() +
	theme(legend.position = "none")
	)


	# left join with data.table between elev_sf and rsmp_idx by row_id
	x_sf_folds <- merge(as.data.table(x), rcv$instance, by = "row_id")
	boot_vec <- spatial_block_bootstrap(
	x_sf_folds, target_col, f, n_bootstrap, idcol, progress
	)

	boot_stat <- mean(boot_vec)

	boot_stat_ci_low <- quantile(boot_vec, alpha / 2)[[1]]
	boot_stat_ci_high <- quantile(boot_vec, 1 - alpha / 2)[[1]]

	sp_boot_obj <- list(
	boot_results = boot_vec,
	boot_stat = boot_stat,
	boot_stat_ci_low = boot_stat_ci_low,
	boot_stat_ci_high = boot_stat_ci_high,
	boot_stat_ci_perc = ((boot_stat_ci_high - boot_stat_ci_low) / boot_stat) * 100,
	fold_plot = p
	)
	class(sp_boot_obj) <- "spatboot"
	return(sp_boot_obj)
	}