Skip to content

Instantly share code, notes, and snippets.

@h-a-graham
Last active January 13, 2025 16:26
Show Gist options
  • Save h-a-graham/35b6ee6b5fd4fac75eb87629265fb8ee to your computer and use it in GitHub Desktop.
Save h-a-graham/35b6ee6b5fd4fac75eb87629265fb8ee to your computer and use it in GitHub Desktop.
create Harmonized Sentinel Landsat Mosaic with R
# Description: This script is used to download HLS data from NASA's Earthdata
# STAC API and build a cloud-free composite image.
# ----- Functions -----
# band mapping for HLS SL data
#' @return a named character vector of band mappings for HLS SL data
hlssl_band_mapping <- function() {
c(
B01 = "A", B02 = "B", B03 = "G", B04 = "R",
B05 = "N2", B06 = "S1", B07 = "S2",
B09 = "C", B10 = "T1",
B11 = "T2"
)
}
#' band mapping for HLS SS data
#' @return a named character vector of band mappings for HLS SS data
hlsss_band_mapping <- function() {
c(
B01 = "A", B02 = "B", B03 = "G", B04 = "R",
B05 = "RE1", B06 = "RE2", B07 = "RE3",
B08 = "N", B8A = "N2", B09 = "WV",
B10 = "C", B11 = "S1", B12 = "S2"
)
}
#' Mask function for HLS data
#' @param raster a SpatRaster object. The mask asset of a given HLS item.
#' @param bits a numeric vector. The bits to be used for masking.
#' @return a SpatRaster with logical values.
#' @details This function is used to mask HLS data using the Fmask band.
#' the bit values relate to the folloeing feautres in the Fmask band:
#' 0: Cirrus, 1: Cloud, 2: Adjacent to cloud/shadow, 3: Cloud shadow,
#' 4: Snow/ice, 5: Water, 6-7: aerosol level
#' The defaults are quite agressive but do a good job of removing clouds and
#' shadows.
hls_mask_fun <- function(bits = c(0, 1, 2, 3, 4, 5, 7)) {
function(raster) {
bm <- sum(
terra::rast(lapply(
bits,
\(b) {
terra::app(
raster,
function(x) bitwAnd(x, bitwShiftL(1, b)) > 0
)
}
)),
na.rm = TRUE
)
bm == 0
}
}
#' Cloud cover filter for HLS data
#' A function factory creating a rsi query function that filters HLS data
#' based on cloud cover proportion.
#' @param eo_cloud_cover a numeric value. The maximum cloud cover proportion to
#' filter HLS data by. must be between 0 and 100.
#' @return a function that can be used as the query_function argument in
#' rsi::get_stac_data.
hls_cloud_cover_filter <- function(eo_cloud_cover = 25) {
if (!rlang::is_bare_numeric(eo_cloud_cover)) {
rlang::abort("`eo_cloud_cover`` must be a numeric value",
class = "eo_cloud_cover_not_numeric"
)
}
if (rlang::is_false(eo_cloud_cover >= 0 && eo_cloud_cover <= 100)) {
rlang::abort("`eo_cloud_cover` must be between 0 and 100",
class = "eo_cloud_cover_out_of_range"
)
}
function(bbox, stac_source, collection, start_date, end_date, ...) {
request <- rstac::stac(stac_source) |>
rstac::stac_search(
collections = collection,
bbox = bbox[1:4],
datetime = paste(start_date, end_date, sep = "/")
)
its <- rstac::items_fetch(rstac::post_request(request)) |>
rstac::items_filter(filter_fn = function(x) {
x$properties$`eo:cloud_cover` <= eo_cloud_cover
})
if (rstac::items_length(its) == 0) {
cli::cli_abort("No items found with cloud cover <= {eo_cloud_cover}")
} else {
cli::cli_alert_info(
"Found {rstac::items_length(its)} items
with cloud cover < {eo_cloud_cover}"
)
}
return(its)
}
}
#' GDAL warp options for HLS data
#' @return a named character vector of GDAL warp options
#' @details These options are used to configure the GDAL warp process for
#' HLS data. The only difference between this and `rsi::rsi_gdalwarp_options()`
#' is the exclusion of -multi which seems to unpredictably result in the
#' warper hanging during the data download.
rsi_ned_gdalwarp_options <- function() {
c(
"-r", "bilinear",
"-overwrite",
"-co", "PREDICTOR=2",
"-co", "COMPRESS=DEFLATE"
)
}
#' GDAL config options for downloading HLS data
#' @return a named character vector of GDAL config options
#' @details These options are used to configure the GDAL environment for
#' downloading HLS data. The options are set to optimize performance and
#' reduce the likelihood of errors. They differ slightly from
#' `rsi::rsi_gdal_config_options()``
rsi_ned_gdal_config_options <- function() {
c(
VSI_CACHE = "TRUE",
GDAL_CACHEMAX = "30%",
GDAL_HTTP_MULTIPLEX = "YES",
GDAL_INGESTED_BYTES_AT_OPEN = "32000",
GDAL_DISABLE_READDIR_ON_OPEN = "EMPTY_DIR",
GDAL_HTTP_VERSION = "2",
GDAL_HTTP_MERGE_CONSECUTIVE_RANGES = "YES",
GDAL_HTTP_USERAGENT =
"rsi (https://permian-global-research.github.io/rsi/)",
GDAL_HTTP_HEADERS = glue::glue(
"Authorization: Bearer {Sys.getenv('EARTHDATA_TOKEN')}"
),
GDAL_HTTP_TIMEOUT = "60",
GDAL_HTTP_MAX_RETRY = "10",
GDAL_HTTP_RETRY_DELAY = "0.5",
GDAL_MAX_DATASET_POOL_SIZE = "100",
CPL_VSIL_CURL_CHUNK_SIZE = "10485760",
CPL_VSIL_CURL_CACHE_SIZE = "1342177280",
VSI_CACHE_SIZE = "100000000"
)
}
#' check that extents of rasters match
#' @param ... SpatRaster objects. The first raster is used as the
#' reference
#' @return NULL if all extents match, otherwise the index of the
#' raster with a different extent.
extents_match <- function(...) {
dots <- rlang::list2(...)
exts <- lapply(dots, \(x) terra::ext(x)[])
match_log <- sapply(exts, \(x) all(x == exts[[1]]))
match_idx <- which(!match_log)
if (length(match_idx) == 0) {
return(NULL)
} else {
return(match_idx)
}
}
#' Build a mosaic from a list of rasters and specified band names
#' @param rlist a character vector of raster file paths
#' @param bands a character vector of band names to extract from the rasters
#' @return a SpatRaster object
structured_mosaic <- function(rlist, bands, label) {
if (!is.character(rlist)) rlang::abort("`rlist` must be a character vector")
if (!is.character(bands)) rlang::abort("`bands` must be a character vector")
rl <- lapply(rlist, \(x) terra::rast(x)[[bands]])
nbands <- terra::nlyr(rl[[1]])
if (!all(sapply(rl, terra::nlyr) == nbands)) {
rlang::abort("rasters do not have the same number of layers",
class = "raster_mosaic_layers_diff"
)
}
l <- lapply(seq_along(1:nbands), \(i) {
lapply(rl, \(x) x[[i]])
})
capture.output({
m <- lapply(
cli::cli_progress_along(l, name = glue::glue("Building {label} mosaic")),
\(i) {
terra::mosaic(
terra::sprc(l[[i]]),
fun = "median"
)
}
)
})
return(terra::rast(m))
}
#' Combine HLS data from Sentinel-2 and Landsat-8
#' @param sl_paths a character vector of Landsat-8 HLS file paths
#' @param ss_paths a character vector of Sentinel-2 HLS file paths
#' @param output_filename a character string. The output filename
#' for the combined HLS data.
#' @return a SpatRaster object
#'
hls_combine <- function(sl_paths, ss_paths, output_filename) {
# get names for unique and shared bands between sentinel and landasat
hlsss_unique <- setdiff(hlsss_band_mapping(), hlssl_band_mapping())
hlssl_unique <- setdiff(hlssl_band_mapping(), hlsss_band_mapping())
hls_shared <- intersect(hlssl_band_mapping(), hlsss_band_mapping())
shared <- structured_mosaic(
c(sl_paths, ss_paths), hls_shared, "HLS shared"
)
sl_unique <- structured_mosaic(
sl_paths, hlssl_unique, "HLS Landsat bands"
)
ss_unique <- structured_mosaic(
ss_paths, hlsss_unique, "HLS Sentinel bands"
)
check_exts <- extents_match(shared, sl_unique, ss_unique)
raw_r_list <- list(shared, sl_unique, ss_unique)
if (!is.null(check_exts)) {
r_no_match <- raw_r_list[check_exts]
reproj_r <- lapply(r_no_match, \(x) terra::project(x, shared))
raw_r_list[check_exts] <- reproj_r
}
if (!is.null(
extents_match(raw_r_list[[1]], raw_r_list[[2]], raw_r_list[[3]])
)) {
rlang::abort("HLS raster alignment issue", class = "hls_raster_alignment")
}
terra::subset(
terra::rast(raw_r_list),
c(
"A", "B", "G", "R", "RE1", "RE2", "RE3", "N", "N2", "S1", "S2",
"WV", "C", "T1", "T2"
),
filename = output_filename,
overwrite = TRUE
)
}
#' Project an sf/sfc object to a generic projected coordinate system
#' @param x an sf or sfc object
#' @param proj a character vector. The projection to use. Either "utm" or "laea"
#' @param ellps a character vector. The ellipsoid to use. Select from
#' `sf_proj_info(type = "ellps")`.
#' @param opts a character vector. Additional proj options to pass to the
#' proj string. see details for more information.
#' @return an sf or sfc object
#' @details For further info about the available "generic" projects see:
#' for UTM: \url{https://proj.org/en/9.4/operations/projections/utm.html}
#' for LAEA: \url{https://proj.org/en/9.4/operations/projections/laea.html}
to_generic_projected <- function(
x,
proj = c("utm", "laea"),
ellps = "WGS84",
no_defs = TRUE,
opts = "") {
# arg assertions
if (!rlang::is_true(rlang::inherits_any(x, c("sf", "sfc")))) {
rlang::abort(
"`x` must be either an sf object or an sfc object",
class = "to_generic_projected_incorrect_input"
)
}
proj <- rlang::arg_match(proj)
ellps <- rlang::arg_match(ellps, sf::sf_proj_info(type = "ellps")$name)
if (!rlang::is_logical(no_defs)) {
rlang::abort("`no_defs` must be a logical value",
class = "to_generic_projected_incorrect_input"
)
}
if (!rlang::is_character(opts) && !nchar(opts)) {
rlang::abort("`opts` must be a character vector",
class = "to_generic_projected_incorrect_input"
)
}
# check if the input is longlat
if (!sf::st_is_longlat(x)) {
x <- sf::st_transform(x, 4326)
}
cent_coor <- sf::st_coordinates(sf::st_centroid(x))
n_or_s <- ifelse(cent_coor[2] == 0, "",
ifelse(cent_coor[2] > 0, "+north", "+south")
)
no_defs <- ifelse(no_defs, "+no_defs", "")
prj <- switch(proj,
laea = glue::glue(
"+proj=laea +lon_0={cent_coor[1]} +lat_0={cent_coor[2]}",
"+ellps={ellps} {no_defs}",
opts,
.sep = " "
),
utm = glue::glue(
"+proj=utm +zone={round((180 + cent_coor[1]) / 6)} {n_or_s}",
"+ellps={ellps} {no_defs}",
opts,
.sep = " "
)
)
return(sf::st_transform(x, prj))
}
source("R/hls-functions.R")
# ----- Main -----
# shared args
target_name <- "hls-chile-utm"
if (!dir.exists(target_name)) dir.create(target_name)
start_date <- "2024-01-01"
end_date <- "2024-02-29"
cloud_cover <- 5
# select aoi centroid.
# centroid <- c(87.8438, 69.8)
# centroid <- c(170.767, 68.729)
centroid <- c(-73.26, -46.0265)
# build the aoi
ll <- centroid - c(0.2, 0.1)
ur <- centroid + c(0.2, 0.1)
aoi <- sf::st_bbox(
c(
xmin = ll[1], ymin = ll[2],
xmax = ur[1], ymax = ur[2]
),
crs = "EPSG:4326"
) |>
sf::st_as_sfc() |>
to_generic_projected("utm")
# mapview::mapview(sf::st_as_sf(aoi))
# get Landsat data
hlssl_paths <- rsi::get_stac_data(
aoi = aoi,
start_date = start_date,
end_date = end_date,
pixel_x_size = 30,
pixel_y_size = 30,
asset_names = hlssl_band_mapping(),
stac_source = "https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/",
collection = "HLSL30.v2.0",
rescale_bands = FALSE,
query_function = hls_cloud_cover_filter(eo_cloud_cover = cloud_cover),
output_filename = file.path(
target_name,
glue::glue("{target_name}-HLS-SL-cc25.tif")
),
mask_band = NULL, # "Fmask",
mask_function = NULL, # hls_mask_fun(c(7)),
composite_function = NULL,
gdalwarp_options = rsi_ned_gdalwarp_options(),
gdal_config_options = rsi_ned_gdal_config_options()
)
# get Sentinel-2 data
hlsss_paths <- rsi::get_stac_data(
aoi = aoi,
start_date = start_date,
end_date = end_date,
pixel_x_size = 30,
pixel_y_size = 30,
asset_names = hlsss_band_mapping(),
stac_source = "https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/",
collection = "HLSS30.v2.0",
rescale_bands = FALSE,
query_function = hls_cloud_cover_filter(eo_cloud_cover = cloud_cover),
output_filename = file.path(
target_name,
glue::glue("{target_name}-HLS-SS-cc25.tif")
),
mask_band = NULL, # "Fmask",
mask_function = NULL, # hls_mask_fun(c(7)),
composite_function = NULL,
gdalwarp_options = rsi_ned_gdalwarp_options(),
gdal_config_options = rsi_ned_gdal_config_options()
)
# only needed if something goes wrong with the above...
hlssl_paths <- list.files(target_name, pattern = "HLS-SL", full.names = TRUE)
hlsss_paths <- list.files(target_name, pattern = "HLS-SS", full.names = TRUE)
comp_rast <- hls_combine(
hlssl_paths, hlsss_paths,
glue::glue("{target_name}/{target_name}-HLS-mosaic-laea.tif")
)
# plotting
stretch_ras <- terra::stretch(comp_rast, minq = 0.1, maxq = 0.9)
nr <- terra::nrow(stretch_ras)
nc <- terra::ncol(stretch_ras)
{
png(glue::glue("{target_name}.png"),
width = 2000, height = (2000 * nr / nc) * 2,
res = 600
)
par(mfrow = c(2, 1))
terra::plotRGB(stretch_ras,
r = 5, g = 4, b = 3,
colNA = "black",
smooth = TRUE
)
terra::plotRGB(stretch_ras,
r = 4, g = 3, b = 2,
# stretch = "lin",
colNA = "black",
smooth = TRUE
)
dev.off()
}
terra::plot(comp_rast,
col = hcl.colors(100, "Viridis"), colNA = "red"
)
@h-a-graham
Copy link
Author

hls-ptg

hls-ptg-panel

@h-a-graham
Copy link
Author

updated to use projected coords - removes the need for the tortuous reprojection steps at the end.
hls-ptg

@h-a-graham
Copy link
Author

updated to make more functional - still need to add some logic to allow zero images from one or other of the collections - this in terms of thinking about moving towards a single high level function for HLS.

@h-a-graham
Copy link
Author

hls-chile-utm

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment