Skip to content

Instantly share code, notes, and snippets.

@h-a-graham
Last active February 16, 2025 07:21
Show Gist options
  • Save h-a-graham/11977fc9a934e20e7b466fe83cb28383 to your computer and use it in GitHub Desktop.
Save h-a-graham/11977fc9a934e20e7b466fe83cb28383 to your computer and use it in GitHub Desktop.
create a composite raster using gdal's VRT and warp via {gdalraster}
#' Create a composite raster with a VRT pixel function
#' @param src_files a character vector with the paths to the source rasters.
#' @param outfile a character path to the output raster.
#' @param fun a character with the pixel function to apply.
#' @param t_srs a character with the target SRS. If empty string "", the spatial
#' reference of src_files[1] will be used.
#' @param vrt_options a character vector with additional options to pass to the
#' gdalbuildVRT command.
#' @param warp_options a character vector with additional options to pass to the
#' gdalwarp command.
#' @param config_options a named character vector with additional GDAL configuration
#' options.
#' @param quiet a logical indicating whether to suppress progress bar.
#' @return a character path to the output raster.
vrt_composite <- function(
src_files,
outfile,
fun = c("median", "mean"),
t_srs = "",
vrt_options = NULL,
warp_options = c(
"-multi",
"-overwrite",
"-co", "COMPRESS=DEFLATE",
"-co", "PREDICTOR=2",
"-co", "NUM_THREADS=ALL_CPUS"
),
config_options = c(
GDAL_VRT_ENABLE_PYTHON = "YES",
VSI_CACHE = "TRUE",
GDAL_CACHEMAX = "30%",
VSI_CACHE_SIZE = "10000000",
GDAL_HTTP_MULTIPLEX = "YES",
GDAL_INGESTED_BYTES_AT_OPEN = "32000",
GDAL_DISABLE_READDIR_ON_OPEN = "EMPTY_DIR",
GDAL_HTTP_VERSION = "2",
GDAL_HTTP_MERGE_CONSECUTIVE_RANGES = "YES",
GDAL_NUM_THREADS = "ALL_CPUS"
),
quiet = FALSE) {
fun <- rlang::arg_match(fun)
purrr::iwalk(
config_options,
~ gdalraster::set_config_option(.y, .x)
)
init_vrt_path <- tempfile(fileext = ".vrt")
init_vrt <- gdalraster::buildVRT(
init_vrt_path,
src_files,
cl_arg = vrt_options,
quiet = TRUE
)
tvrt <- xml2::read_xml(init_vrt_path)
bands <- xml2::xml_find_all(tvrt, "//VRTRasterBand")
purrr::walk(bands, function(x) {
xml2::xml_set_attr(x, "subClass", "VRTDerivedRasterBand")
# Add pixel function elements
xml2::xml_add_child(x, "PixelFunctionType", "median")
xml2::xml_add_child(x, "PixelFunctionLanguage", "Python")
xml2::xml_add_child(x, "PixelFunctionCode", glue::glue("
import numpy as np
def median(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, raster_ysize, buf_radius, gt, **kwargs):
out_ar[:] = np.nan{fun}(in_ar, axis=0)
"))
})
# Write modified VRT
med_vrt <- tempfile(fileext = ".vrt")
xml2::write_xml(tvrt, med_vrt)
gdalraster::warp(
med_vrt,
outfile,
t_srs = t_srs,
cl_arg = warp_options,
quiet = quiet
)
return(outfile)
}
@mdsumner
Copy link

I'm doing something entirely wrong now, but I did fix a lot of things mentioned above, so just code for the record

## we aren't doing random numers, and we aren't scared of crashing in rstudio
options(parallelly.fork.enable = TRUE, future.rng.onMisuse = "ignore")
library(furrr); plan(multicore)

library(terra)
library(reproj)

library(vaster) ## remotes::install_github(c("hypertidy/vaster", "hypertidy/grout")
library(grout)
library(sds)

# loc_crs <- "+proj=laea +lon_0=147 +lat_0=-42"
# loc_ex <- c(-1, 1, -1, 1) * 20000
loc_crs <- "+proj=laea +lon_0=-63.8 +lat_0=-8.8"
loc_ex <- c(-1, 1, -1, 1) * 5000

ex <- reproj::reproj_extent(loc_ex, "EPSG:4326", source = loc_crs)
srcurl <- sds::stacit(ex, date = c("2024-02-01", "2024-02-29"))
src <- jsonlite::fromJSON(srcurl)
## there are 10s of thes over 2 months, so think of blocksize below * this length
#src$features$assets$red$href


rgb <- cbind(src$features$assets$red$href, 
             src$features$assets$green$href, 
             src$features$assets$blue$href)

in_dsn <- replicate(nrow(rgb), tempfile(fileext = ".vrt", tmpdir = "./scratch"))
listof <- vector("list", nrow(rgb))
for (j in seq_along(in_dsn)) {
  listof[[j]] <- sprintf("/vsicurl/%s", rgb[j, , drop = TRUE])
}



future_walk2( in_dsn, listof, \(.x, .y) gdalraster::buildVRT(.x, .y, cl_arg = c("-separate")))


## output tiling (this what we loop over, use parallel for the multiple inputs at each tile)
##outdim <- c(2048, 2048)  ## remember we need to have the dim match the output so we could use res here 
## but rast() can snap for us
template <- rast(ext(loc_ex), res = 20)
outdim <- dim(template)[2:1]
print(outdim)

tiles <- grout::tile_index(grout::grout(outdim, loc_ex, blocksize = c(512, 512)))
to_rgb_table <- function(x) {
  colnames(x) <- c("red", "green", "blue")
  tibble::as_tibble(x)
}
wfun <- function(.x, .y, .z, te, t_srs, ts) {
  gdalraster::warp(.x, .y, t_srs = t_srs, cl_arg = c("-te", te, "-ts", ts, "-ot", "Int16"))
 # browser()
  terra::values(terra::rast(.y, raw = TRUE), dataframe = TRUE) |> tibble::as_tibble() |> setNames(c("red", "green", "blue")) |> 
    dplyr::mutate(cell = dplyr::row_number()) |> arrow::write_parquet(.z)
  .z  
}

vaster::plot_extent(loc_ex, asp = 1)
for (i in seq_len(nrow(tiles))) {
  dst_files <- replicate(length(in_dsn), tempfile(fileext = ".tif", tmpdir = "./scratch"))
  
  parquet_files <- replicate(length(in_dsn), tempfile(fileext = ".parquet", tmpdir = "./scratch"))
  
  tile <- tiles[i, ]
  te <- unlist(tile[c("xmin", "ymin", "xmax", "ymax")])

  future_pmap(list(.x = in_dsn, .y = dst_files, .z = parquet_files), wfun,  
            te = te, t_srs = loc_crs, ts = unlist(tile[c("ncol", "nrow")]))
  ## this is the fastest way to group by on pixel cells, from fragmented parquet on disk
  med <- duckdbfs::open_dataset(parquet_files) |> dplyr::group_by(cell) |> dplyr::summarise(red = stats::median(red, na.rm = TRUE),
                                                                                          green = stats::median(green, na.rm = TRUE), 
                                                                                          blue = stats::median(blue, na.rm = TRUE)) |> 
  dplyr::arrange(cell) |>   dplyr::select(red, green, blue) |>
  dplyr::collect()

  ## these come out in 0,1 (and close to 0.45)
  scaled <- tibble::as_tibble(lapply(med[c("red", "green", "blue")], scales::rescale, from = c(4500, 8000), to = c(0, 255)))
  clamp <- function(x) {
    x[x < 0] <- 0
    x[x > 255] <- 255
    x
  }
  for (ii in 1:3) scaled[[ii]] <- clamp(scaled[[ii]])
  r <- setValues(rast(dst_files[i], raw = TRUE), scaled)
  plotRGB(r, add = TRUE) 
  print(i)
} 


@mdsumner
Copy link

ps don't run this code without care, clear out ./scratch each time. I was way too blithe and having problems with a very long list of files in a directory ...

@h-a-graham
Copy link
Author

h-a-graham commented Feb 16, 2025

Thanks so much for all of this will dig into this more asap!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment