Last active
January 13, 2025 16:26
-
-
Save h-a-graham/35b6ee6b5fd4fac75eb87629265fb8ee to your computer and use it in GitHub Desktop.
create Harmonized Sentinel Landsat Mosaic with R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Description: This script is used to download HLS data from NASA's Earthdata | |
# STAC API and build a cloud-free composite image. | |
# ----- Functions ----- | |
# band mapping for HLS SL data | |
#' @return a named character vector of band mappings for HLS SL data | |
hlssl_band_mapping <- function() { | |
c( | |
B01 = "A", B02 = "B", B03 = "G", B04 = "R", | |
B05 = "N2", B06 = "S1", B07 = "S2", | |
B09 = "C", B10 = "T1", | |
B11 = "T2" | |
) | |
} | |
#' band mapping for HLS SS data | |
#' @return a named character vector of band mappings for HLS SS data | |
hlsss_band_mapping <- function() { | |
c( | |
B01 = "A", B02 = "B", B03 = "G", B04 = "R", | |
B05 = "RE1", B06 = "RE2", B07 = "RE3", | |
B08 = "N", B8A = "N2", B09 = "WV", | |
B10 = "C", B11 = "S1", B12 = "S2" | |
) | |
} | |
#' Mask function for HLS data | |
#' @param raster a SpatRaster object. The mask asset of a given HLS item. | |
#' @param bits a numeric vector. The bits to be used for masking. | |
#' @return a SpatRaster with logical values. | |
#' @details This function is used to mask HLS data using the Fmask band. | |
#' the bit values relate to the folloeing feautres in the Fmask band: | |
#' 0: Cirrus, 1: Cloud, 2: Adjacent to cloud/shadow, 3: Cloud shadow, | |
#' 4: Snow/ice, 5: Water, 6-7: aerosol level | |
#' The defaults are quite agressive but do a good job of removing clouds and | |
#' shadows. | |
hls_mask_fun <- function(bits = c(0, 1, 2, 3, 4, 5, 7)) { | |
function(raster) { | |
bm <- sum( | |
terra::rast(lapply( | |
bits, | |
\(b) { | |
terra::app( | |
raster, | |
function(x) bitwAnd(x, bitwShiftL(1, b)) > 0 | |
) | |
} | |
)), | |
na.rm = TRUE | |
) | |
bm == 0 | |
} | |
} | |
#' Cloud cover filter for HLS data | |
#' A function factory creating a rsi query function that filters HLS data | |
#' based on cloud cover proportion. | |
#' @param eo_cloud_cover a numeric value. The maximum cloud cover proportion to | |
#' filter HLS data by. must be between 0 and 100. | |
#' @return a function that can be used as the query_function argument in | |
#' rsi::get_stac_data. | |
hls_cloud_cover_filter <- function(eo_cloud_cover = 25) { | |
if (!rlang::is_bare_numeric(eo_cloud_cover)) { | |
rlang::abort("`eo_cloud_cover`` must be a numeric value", | |
class = "eo_cloud_cover_not_numeric" | |
) | |
} | |
if (rlang::is_false(eo_cloud_cover >= 0 && eo_cloud_cover <= 100)) { | |
rlang::abort("`eo_cloud_cover` must be between 0 and 100", | |
class = "eo_cloud_cover_out_of_range" | |
) | |
} | |
function(bbox, stac_source, collection, start_date, end_date, ...) { | |
request <- rstac::stac(stac_source) |> | |
rstac::stac_search( | |
collections = collection, | |
bbox = bbox[1:4], | |
datetime = paste(start_date, end_date, sep = "/") | |
) | |
its <- rstac::items_fetch(rstac::post_request(request)) |> | |
rstac::items_filter(filter_fn = function(x) { | |
x$properties$`eo:cloud_cover` <= eo_cloud_cover | |
}) | |
if (rstac::items_length(its) == 0) { | |
cli::cli_abort("No items found with cloud cover <= {eo_cloud_cover}") | |
} else { | |
cli::cli_alert_info( | |
"Found {rstac::items_length(its)} items | |
with cloud cover < {eo_cloud_cover}" | |
) | |
} | |
return(its) | |
} | |
} | |
#' GDAL warp options for HLS data | |
#' @return a named character vector of GDAL warp options | |
#' @details These options are used to configure the GDAL warp process for | |
#' HLS data. The only difference between this and `rsi::rsi_gdalwarp_options()` | |
#' is the exclusion of -multi which seems to unpredictably result in the | |
#' warper hanging during the data download. | |
rsi_ned_gdalwarp_options <- function() { | |
c( | |
"-r", "bilinear", | |
"-overwrite", | |
"-co", "PREDICTOR=2", | |
"-co", "COMPRESS=DEFLATE" | |
) | |
} | |
#' GDAL config options for downloading HLS data | |
#' @return a named character vector of GDAL config options | |
#' @details These options are used to configure the GDAL environment for | |
#' downloading HLS data. The options are set to optimize performance and | |
#' reduce the likelihood of errors. They differ slightly from | |
#' `rsi::rsi_gdal_config_options()`` | |
rsi_ned_gdal_config_options <- function() { | |
c( | |
VSI_CACHE = "TRUE", | |
GDAL_CACHEMAX = "30%", | |
GDAL_HTTP_MULTIPLEX = "YES", | |
GDAL_INGESTED_BYTES_AT_OPEN = "32000", | |
GDAL_DISABLE_READDIR_ON_OPEN = "EMPTY_DIR", | |
GDAL_HTTP_VERSION = "2", | |
GDAL_HTTP_MERGE_CONSECUTIVE_RANGES = "YES", | |
GDAL_HTTP_USERAGENT = | |
"rsi (https://permian-global-research.github.io/rsi/)", | |
GDAL_HTTP_HEADERS = glue::glue( | |
"Authorization: Bearer {Sys.getenv('EARTHDATA_TOKEN')}" | |
), | |
GDAL_HTTP_TIMEOUT = "60", | |
GDAL_HTTP_MAX_RETRY = "10", | |
GDAL_HTTP_RETRY_DELAY = "0.5", | |
GDAL_MAX_DATASET_POOL_SIZE = "100", | |
CPL_VSIL_CURL_CHUNK_SIZE = "10485760", | |
CPL_VSIL_CURL_CACHE_SIZE = "1342177280", | |
VSI_CACHE_SIZE = "100000000" | |
) | |
} | |
#' check that extents of rasters match | |
#' @param ... SpatRaster objects. The first raster is used as the | |
#' reference | |
#' @return NULL if all extents match, otherwise the index of the | |
#' raster with a different extent. | |
extents_match <- function(...) { | |
dots <- rlang::list2(...) | |
exts <- lapply(dots, \(x) terra::ext(x)[]) | |
match_log <- sapply(exts, \(x) all(x == exts[[1]])) | |
match_idx <- which(!match_log) | |
if (length(match_idx) == 0) { | |
return(NULL) | |
} else { | |
return(match_idx) | |
} | |
} | |
#' Build a mosaic from a list of rasters and specified band names | |
#' @param rlist a character vector of raster file paths | |
#' @param bands a character vector of band names to extract from the rasters | |
#' @return a SpatRaster object | |
structured_mosaic <- function(rlist, bands, label) { | |
if (!is.character(rlist)) rlang::abort("`rlist` must be a character vector") | |
if (!is.character(bands)) rlang::abort("`bands` must be a character vector") | |
rl <- lapply(rlist, \(x) terra::rast(x)[[bands]]) | |
nbands <- terra::nlyr(rl[[1]]) | |
if (!all(sapply(rl, terra::nlyr) == nbands)) { | |
rlang::abort("rasters do not have the same number of layers", | |
class = "raster_mosaic_layers_diff" | |
) | |
} | |
l <- lapply(seq_along(1:nbands), \(i) { | |
lapply(rl, \(x) x[[i]]) | |
}) | |
capture.output({ | |
m <- lapply( | |
cli::cli_progress_along(l, name = glue::glue("Building {label} mosaic")), | |
\(i) { | |
terra::mosaic( | |
terra::sprc(l[[i]]), | |
fun = "median" | |
) | |
} | |
) | |
}) | |
return(terra::rast(m)) | |
} | |
#' Combine HLS data from Sentinel-2 and Landsat-8 | |
#' @param sl_paths a character vector of Landsat-8 HLS file paths | |
#' @param ss_paths a character vector of Sentinel-2 HLS file paths | |
#' @param output_filename a character string. The output filename | |
#' for the combined HLS data. | |
#' @return a SpatRaster object | |
#' | |
hls_combine <- function(sl_paths, ss_paths, output_filename) { | |
# get names for unique and shared bands between sentinel and landasat | |
hlsss_unique <- setdiff(hlsss_band_mapping(), hlssl_band_mapping()) | |
hlssl_unique <- setdiff(hlssl_band_mapping(), hlsss_band_mapping()) | |
hls_shared <- intersect(hlssl_band_mapping(), hlsss_band_mapping()) | |
shared <- structured_mosaic( | |
c(sl_paths, ss_paths), hls_shared, "HLS shared" | |
) | |
sl_unique <- structured_mosaic( | |
sl_paths, hlssl_unique, "HLS Landsat bands" | |
) | |
ss_unique <- structured_mosaic( | |
ss_paths, hlsss_unique, "HLS Sentinel bands" | |
) | |
check_exts <- extents_match(shared, sl_unique, ss_unique) | |
raw_r_list <- list(shared, sl_unique, ss_unique) | |
if (!is.null(check_exts)) { | |
r_no_match <- raw_r_list[check_exts] | |
reproj_r <- lapply(r_no_match, \(x) terra::project(x, shared)) | |
raw_r_list[check_exts] <- reproj_r | |
} | |
if (!is.null( | |
extents_match(raw_r_list[[1]], raw_r_list[[2]], raw_r_list[[3]]) | |
)) { | |
rlang::abort("HLS raster alignment issue", class = "hls_raster_alignment") | |
} | |
terra::subset( | |
terra::rast(raw_r_list), | |
c( | |
"A", "B", "G", "R", "RE1", "RE2", "RE3", "N", "N2", "S1", "S2", | |
"WV", "C", "T1", "T2" | |
), | |
filename = output_filename, | |
overwrite = TRUE | |
) | |
} | |
#' Project an sf/sfc object to a generic projected coordinate system | |
#' @param x an sf or sfc object | |
#' @param proj a character vector. The projection to use. Either "utm" or "laea" | |
#' @param ellps a character vector. The ellipsoid to use. Select from | |
#' `sf_proj_info(type = "ellps")`. | |
#' @param opts a character vector. Additional proj options to pass to the | |
#' proj string. see details for more information. | |
#' @return an sf or sfc object | |
#' @details For further info about the available "generic" projects see: | |
#' for UTM: \url{https://proj.org/en/9.4/operations/projections/utm.html} | |
#' for LAEA: \url{https://proj.org/en/9.4/operations/projections/laea.html} | |
to_generic_projected <- function( | |
x, | |
proj = c("utm", "laea"), | |
ellps = "WGS84", | |
no_defs = TRUE, | |
opts = "") { | |
# arg assertions | |
if (!rlang::is_true(rlang::inherits_any(x, c("sf", "sfc")))) { | |
rlang::abort( | |
"`x` must be either an sf object or an sfc object", | |
class = "to_generic_projected_incorrect_input" | |
) | |
} | |
proj <- rlang::arg_match(proj) | |
ellps <- rlang::arg_match(ellps, sf::sf_proj_info(type = "ellps")$name) | |
if (!rlang::is_logical(no_defs)) { | |
rlang::abort("`no_defs` must be a logical value", | |
class = "to_generic_projected_incorrect_input" | |
) | |
} | |
if (!rlang::is_character(opts) && !nchar(opts)) { | |
rlang::abort("`opts` must be a character vector", | |
class = "to_generic_projected_incorrect_input" | |
) | |
} | |
# check if the input is longlat | |
if (!sf::st_is_longlat(x)) { | |
x <- sf::st_transform(x, 4326) | |
} | |
cent_coor <- sf::st_coordinates(sf::st_centroid(x)) | |
n_or_s <- ifelse(cent_coor[2] == 0, "", | |
ifelse(cent_coor[2] > 0, "+north", "+south") | |
) | |
no_defs <- ifelse(no_defs, "+no_defs", "") | |
prj <- switch(proj, | |
laea = glue::glue( | |
"+proj=laea +lon_0={cent_coor[1]} +lat_0={cent_coor[2]}", | |
"+ellps={ellps} {no_defs}", | |
opts, | |
.sep = " " | |
), | |
utm = glue::glue( | |
"+proj=utm +zone={round((180 + cent_coor[1]) / 6)} {n_or_s}", | |
"+ellps={ellps} {no_defs}", | |
opts, | |
.sep = " " | |
) | |
) | |
return(sf::st_transform(x, prj)) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source("R/hls-functions.R") | |
# ----- Main ----- | |
# shared args | |
target_name <- "hls-chile-utm" | |
if (!dir.exists(target_name)) dir.create(target_name) | |
start_date <- "2024-01-01" | |
end_date <- "2024-02-29" | |
cloud_cover <- 5 | |
# select aoi centroid. | |
# centroid <- c(87.8438, 69.8) | |
# centroid <- c(170.767, 68.729) | |
centroid <- c(-73.26, -46.0265) | |
# build the aoi | |
ll <- centroid - c(0.2, 0.1) | |
ur <- centroid + c(0.2, 0.1) | |
aoi <- sf::st_bbox( | |
c( | |
xmin = ll[1], ymin = ll[2], | |
xmax = ur[1], ymax = ur[2] | |
), | |
crs = "EPSG:4326" | |
) |> | |
sf::st_as_sfc() |> | |
to_generic_projected("utm") | |
# mapview::mapview(sf::st_as_sf(aoi)) | |
# get Landsat data | |
hlssl_paths <- rsi::get_stac_data( | |
aoi = aoi, | |
start_date = start_date, | |
end_date = end_date, | |
pixel_x_size = 30, | |
pixel_y_size = 30, | |
asset_names = hlssl_band_mapping(), | |
stac_source = "https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/", | |
collection = "HLSL30.v2.0", | |
rescale_bands = FALSE, | |
query_function = hls_cloud_cover_filter(eo_cloud_cover = cloud_cover), | |
output_filename = file.path( | |
target_name, | |
glue::glue("{target_name}-HLS-SL-cc25.tif") | |
), | |
mask_band = NULL, # "Fmask", | |
mask_function = NULL, # hls_mask_fun(c(7)), | |
composite_function = NULL, | |
gdalwarp_options = rsi_ned_gdalwarp_options(), | |
gdal_config_options = rsi_ned_gdal_config_options() | |
) | |
# get Sentinel-2 data | |
hlsss_paths <- rsi::get_stac_data( | |
aoi = aoi, | |
start_date = start_date, | |
end_date = end_date, | |
pixel_x_size = 30, | |
pixel_y_size = 30, | |
asset_names = hlsss_band_mapping(), | |
stac_source = "https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/", | |
collection = "HLSS30.v2.0", | |
rescale_bands = FALSE, | |
query_function = hls_cloud_cover_filter(eo_cloud_cover = cloud_cover), | |
output_filename = file.path( | |
target_name, | |
glue::glue("{target_name}-HLS-SS-cc25.tif") | |
), | |
mask_band = NULL, # "Fmask", | |
mask_function = NULL, # hls_mask_fun(c(7)), | |
composite_function = NULL, | |
gdalwarp_options = rsi_ned_gdalwarp_options(), | |
gdal_config_options = rsi_ned_gdal_config_options() | |
) | |
# only needed if something goes wrong with the above... | |
hlssl_paths <- list.files(target_name, pattern = "HLS-SL", full.names = TRUE) | |
hlsss_paths <- list.files(target_name, pattern = "HLS-SS", full.names = TRUE) | |
comp_rast <- hls_combine( | |
hlssl_paths, hlsss_paths, | |
glue::glue("{target_name}/{target_name}-HLS-mosaic-laea.tif") | |
) | |
# plotting | |
stretch_ras <- terra::stretch(comp_rast, minq = 0.1, maxq = 0.9) | |
nr <- terra::nrow(stretch_ras) | |
nc <- terra::ncol(stretch_ras) | |
{ | |
png(glue::glue("{target_name}.png"), | |
width = 2000, height = (2000 * nr / nc) * 2, | |
res = 600 | |
) | |
par(mfrow = c(2, 1)) | |
terra::plotRGB(stretch_ras, | |
r = 5, g = 4, b = 3, | |
colNA = "black", | |
smooth = TRUE | |
) | |
terra::plotRGB(stretch_ras, | |
r = 4, g = 3, b = 2, | |
# stretch = "lin", | |
colNA = "black", | |
smooth = TRUE | |
) | |
dev.off() | |
} | |
terra::plot(comp_rast, | |
col = hcl.colors(100, "Viridis"), colNA = "red" | |
) |
Author
h-a-graham
commented
Oct 17, 2024
updated to make more functional - still need to add some logic to allow zero images from one or other of the collections - this in terms of thinking about moving towards a single high level function for HLS.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment