Created
February 25, 2021 23:40
-
-
Save mbjoseph/2925ff5126fccb9e9e18adc2d65e4b97 to your computer and use it in GitHub Desktop.
Minimal example of a Bayesian finite sample approach for wildfires using brms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(sf) | |
library(here) | |
library(brms) | |
library(lubridate) | |
library(reshape2) | |
# Get ecoregion data ------------------------------------------------------ | |
download.file("ftp://newftp.epa.gov/EPADataCommons/ORD/Ecoregions/us/us_eco_l4.zip", | |
destfile = "ecoregions.zip") | |
unzip("ecoregions.zip") | |
ecoregions <- st_read("us_eco_l4_no_st.shp") %>% | |
st_make_valid() %>% | |
# summarize to level 3 regions (assuming we don't want to go down to level 4) | |
group_by(L3_KEY, L2_KEY, L1_KEY) %>% | |
summarize() | |
ecoregions <- ecoregions %>% | |
ungroup %>% | |
mutate(eco_area_sq_m = st_area(ecoregions), | |
eco_area_sq_km = as.numeric(eco_area_sq_m) / 1000000) | |
ecoregions %>% | |
ggplot() + | |
geom_sf() | |
# Get MTBS data ----------------------------------------------------------- | |
download.file("https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/MTBS_Fire/data/composite_data/fod_pt_shapefile/mtbs_fod_pts_data.zip", | |
destfile = "mtbs.zip") | |
unzip("mtbs.zip") | |
mtbs <- st_read("mtbs_FODpoints_DD.shp") %>% | |
st_transform(st_crs(ecoregions)) %>% | |
filter(Incid_Type == "Wildfire", | |
BurnBndAc > 1e3) %>% | |
mutate(year = year(Ig_Date), | |
month = month(Ig_Date)) | |
mtbs %>% | |
ggplot() + | |
geom_sf() | |
# Find the ecoregions for each MTBS event --------------------------------- | |
mtbs_eco <- st_intersection(mtbs, ecoregions) | |
mtbs_eco %>% | |
filter(grepl("GREAT PLAINS", L1_KEY))%>% | |
ggplot() + | |
geom_sf(aes(color = L2_KEY)) | |
# Generate clean count and size data sets ---------------------- | |
nonzero_counts <- mtbs_eco %>% | |
as_tibble %>% | |
count(year, month, L3_KEY) | |
ecoregion_keys <- ecoregions %>% | |
as_tibble %>% | |
select(ends_with("KEY"), eco_area_sq_km) | |
# start with a small subset of the data to reduce model run times | |
focal_l1_region <- "GREAT PLAINS" | |
max_year_train_set <- 2010 | |
all_counts <- expand.grid(year = 1984:2018, | |
month = 1:12, | |
L3_KEY = levels(ecoregions$L3_KEY)) %>% | |
as_tibble %>% | |
filter(!(year == 1984 & month == 1)) %>% # Jan 1984 is not in MTBS | |
left_join(nonzero_counts) %>% | |
mutate(n = ifelse(is.na(n), 0, n)) %>% | |
left_join(ecoregion_keys) | |
train_counts <- all_counts %>% | |
filter( | |
grepl(focal_l1_region, L1_KEY), # start by focusing on one ecoregion | |
year < max_year_train_set # start with a subset of years | |
) | |
train_sizes <- mtbs_eco %>% | |
as_tibble %>% | |
filter( | |
grepl(focal_l1_region, L1_KEY), # start by focusing on one ecoregion | |
year < max_year_train_set # start with a subset of years | |
) | |
test_sizes <- mtbs_eco %>% | |
as_tibble %>% | |
filter(grepl(focal_l1_region, L1_KEY)) %>% | |
anti_join(train_sizes) | |
# Fit a count model ------------------------------------------------------- | |
# the smoother with bs = "cc" is a cyclic spline smoother (essentially an | |
# easy way to account for seasonality, though in a real model it would be | |
# better to include climate data instead) | |
count_model <- brm(n ~ s(month, bs = "cc") + (1 | L2_KEY) + (1 | L3_KEY) + | |
offset(log(eco_area_sq_km)), | |
data = train_counts, | |
family = zero_inflated_negbinomial(), | |
cores = 4 | |
) | |
summary(count_model) | |
plot(conditional_effects(count_model), ask = FALSE) | |
# Fit a fire size model --------------------------------------------------- | |
size_model <- brm(BurnBndAc ~ s(month, bs = "cc") + (1 | L2_KEY) + (1 | L3_KEY), | |
data = train_sizes, | |
family = lognormal(), | |
cores = 4) | |
summary(size_model) | |
plot(conditional_effects(size_model), ask = FALSE) | |
# Combine outputs from count and size model to infer total burn area ------ | |
test_counts <- anti_join(all_counts, train_counts) %>% | |
filter(L3_KEY %in% train_counts$L3_KEY) %>% | |
mutate(test_row_index = 1:n()) | |
# generate a set of count predictions for each row in the test data | |
count_predictions <- predict(count_model, | |
summary = FALSE, | |
newdata = test_counts, | |
allow_new_levels = TRUE) %>% | |
reshape2::melt(varnames = c("iter", "test_row_index")) %>% | |
as_tibble %>% | |
filter(value > 0) # we only need to predict sizes for predicted events | |
## For each iteration, simulate total burned area values ------------------- | |
# (note that this could be parallelized to speed things up, but a for loop | |
# may be easier to debug) | |
max_iter <- 1000 | |
size_predictions <- list() | |
pb <- txtProgressBar(max = max_iter, style = 3) | |
for (i in 1:max_iter) { | |
size_pred_df <- count_predictions %>% | |
filter(iter == i) %>% | |
left_join(test_counts, by = "test_row_index") | |
# simulate size values | |
size_matrix <- predict(size_model, | |
newdata = size_pred_df, | |
summary = FALSE, | |
allow_new_levels = TRUE) | |
# choose one posterior draw at random | |
random_draw <- sample(1:nrow(size_matrix), size = 1) | |
# compute a prediction for total burned area | |
size_pred_df$predicted_size <- size_matrix[random_draw, ] | |
size_predictions[[i]] <- size_pred_df %>% | |
select(iter, test_row_index, predicted_size) | |
setTxtProgressBar(pb, i) | |
} | |
close(pb) | |
# Generate a clean dataset of predictions --------------------------------- | |
posterior_preds <- bind_rows(size_predictions) %>% | |
left_join(test_counts) %>% | |
group_by(iter, L3_KEY, year, month) %>% | |
summarize(n_events_pred = length(predicted_size), | |
total_burned_area_pred = sum(predicted_size), | |
maximum_fire_size_pred = max(predicted_size)) %>% | |
ungroup %>% | |
# fill implicit zeros (occurs when no events are predicted) | |
complete(iter, L3_KEY, year, month, | |
fill = list( | |
n_events_pred = 0, | |
total_burned_area_pred = 0, | |
maximum_fire_size_pred = 0 | |
)) %>% | |
filter(!(year == 1984 & month == 1), # Jan 1984 is not in MTBS | |
L3_KEY %in% train_counts$L3_KEY) %>% | |
mutate(first_day_of_month = as.Date(paste(year, | |
sprintf("%02d", month), | |
"01", | |
sep = "-"))) | |
# Visualize predictions --------------------------------------------------- | |
# show one line per posterior iteration | |
posterior_preds %>% | |
ggplot(aes(first_day_of_month, total_burned_area_pred)) + | |
geom_line(aes(group = iter), alpha = .2) + | |
facet_wrap(~L3_KEY, ncol = 2) + | |
xlab("Date") + | |
ylab("Total burned area (acres)") | |
# show posterior median & 95 credible interval + actual withheld data | |
test_summary <- test_sizes %>% | |
mutate(first_day_of_month = as.Date(paste(year, | |
sprintf("%02d", month), | |
"01", | |
sep = "-"))) %>% | |
group_by(L3_KEY, first_day_of_month) %>% | |
summarize(total_burned_area = sum(BurnBndAc)) %>% | |
ungroup | |
# predictions shown as ribbons, withheld data shown as points | |
posterior_preds %>% | |
group_by(first_day_of_month, L3_KEY) %>% | |
mutate( | |
med = median(total_burned_area_pred), | |
lo = quantile(total_burned_area_pred, .025), | |
hi = quantile(total_burned_area_pred, .975) | |
) %>% | |
ggplot(aes(first_day_of_month)) + | |
geom_ribbon(aes(ymin = lo, ymax = hi), | |
color = NA, alpha = .4, fill = "darkred") + | |
geom_line(aes(y = med)) + | |
facet_wrap(~L3_KEY, ncol = 2) + | |
xlab("Date") + | |
ylab("Total burned area (acres)") + | |
geom_point(data = test_summary, | |
aes(y = total_burned_area)) + | |
scale_y_log10() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment