Created
May 14, 2022 19:32
-
-
Save dblodgett-usgs/e6504a2ecf2d26eb2de580cc3109a9e2 to your computer and use it in GitHub Desktop.
nwis dv to netcdf-dsg with the ncdfgeom package.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sf) | |
library(dplyr) | |
library(dataRetrieval) | |
library(RNetCDF) | |
# https://water.usgs.gov/GIS/metadata/usgswrd/XML/gagesII_Sept2011.xml | |
gagesii <- sf::read_sf("R/nexus_locations/gagesII_9322_point_shapefile/gagesII_9322_sept30_2011.shp") | |
# Just look at reference gages with recent active status and more than 80 years of record. | |
ref <- dplyr::filter(gagesii, CLASS == "Ref" & ACTIVE09 == "yes" & FLYRS1900 > 80) | |
i<-1 | |
for(site in ref$STAID) { | |
file <- paste0("out/", site, ".rds") | |
if(!file.exists(file)) { | |
try( | |
# just builds a cache of data for later. | |
saveRDS(dataRetrieval::readNWISdv(site, "00060", startDate = "1900-01-01"), | |
file = file), silent = FALSE) | |
print(paste(site, i)) | |
} | |
i <- i + 1 | |
} | |
outfiles <- list.files("out/") | |
out_times <- seq(from = as.Date("1900-01-01"), | |
to = as.Date("2022-05-14 00:00"), by = "day") | |
out_data <- data.frame(matrix(numeric(0), | |
nrow = length(out_times), | |
ncol = length(outfiles))) | |
out_meta <- data.frame(matrix(character(0), | |
nrow = length(out_times), | |
ncol = length(outfiles)), stringsAsFactors = FALSE) | |
names(out_data) <- names(out_meta) <- gsub(".rds", "", outfiles) | |
max_gap <- 5 | |
# now we can step through the data and populate our big array. | |
i <- 0 | |
for(f in outfiles) { | |
dat <- readRDS(file.path("out/", f)) | |
if(nrow(dat) > 0) { | |
site <- dat$site_no[1] | |
out_dat <- approx(dat$Date, dat$X_00060_00003, out_times, rule = 1) | |
out_met <- data.frame(dateTime = out_times, seq = seq(1, length(out_times))) | |
out_met_temp <- approx(out_met$dateTime, out_met$seq, dat$Date, method = "constant") %>% | |
as.data.frame() %>% | |
left_join(select(dat, dt = Date, cd = X_00060_00003_cd), by = c("x" = "dt")) %>% | |
group_by(y) %>% | |
filter(row_number() == 1) %>% | |
ungroup() %>% select(-x) | |
out_met <- left_join(out_met, out_met_temp, by = c("seq" = "y")) %>% | |
select(-seq) | |
# the method here fills missing record but we need to remove the stuff | |
# that was interpolated in a large gap. | |
diff_time <- diff(as.numeric(dat$Date)) | |
too_big_ind <- which(diff_time > max_gap) | |
too_big_start <- dat$Date[too_big_ind] | |
too_big_end <- dat$Date[too_big_ind + 1] | |
for(gap in 1:length(too_big_start)) { | |
test <- out_dat$x > too_big_start[gap] & out_dat$x < too_big_end[gap] | |
out_dat$y[test] <- NA | |
out_met$cd[test] <- "" | |
} | |
out_data[site] <- out_dat$y | |
out_meta[site] <- out_met$cd | |
} | |
i<-i+1 | |
print(paste(i, length(outfiles))) | |
} | |
attributes <- data.frame(staid = names(out_data)) %>% | |
left_join(st_set_geometry(ref, NULL), by = c("staid" = "STAID")) | |
lats <- attributes$LAT_GAGE | |
lons <- attributes$LNG_GAGE | |
attributes <- select(attributes, | |
station_name = STANAME, | |
da_sqkm = DRAIN_SQKM, | |
hcdn_2009 = HCDN_2009) | |
attributes$hcdn_2009[is.na(attributes$hcdn_2009)] <- "no" | |
ncdfgeom::write_timeseries_dsg("top_nwis_dv.nc", | |
instance_names = names(out_data), | |
lats = lats, lons = lons, | |
times = out_times, | |
data = out_data, | |
data_unit = "ft^3/s", | |
data_prec = "float", | |
data_metadata = list(name = "Discharge", | |
long_name = "Observed daily river discharge in CFS"), | |
overwrite = TRUE) | |
ncdfgeom::write_timeseries_dsg("top_nwis_dv.nc", | |
instance_names = names(out_meta), | |
lats = lats, | |
lons = lons, | |
times = out_times, | |
data = out_meta, | |
data_unit = "none", | |
data_prec = "char", | |
data_metadata = list(name = "quality_flags", | |
long_name = "NWIS quality flags"), | |
add_to_existing = TRUE) | |
ncdfgeom::write_attribute_data("top_nwis_dv.nc", attributes, | |
units = c("none", "km^2", "none")) | |
system("nccopy -k nc4 -d 1 -c instance/1,time/,char/,instance_name_char/,quality_flags_char/ top_nwis_dv.nc top_nwis_dv_2.nc") | |
file.rename("top_nwis_dv_2.nc", "top_nwis_dv.nc") | |
saveRDS(out_data, "out_data.rds") | |
saveRDS(out_meta, "out_meta.rds") | |
saveRDS(out_times, "out_times.rds") | |
##### Testing and exploration ##### | |
nwis_sites <- var.get.nc(nc_nwis, "instance_name") | |
nwm_lookup <- var.get.nc(nc_nwis, "COMID") | |
nwis_time <- utcal.nc(att.get.nc(nc_nwis, "time", "units"), | |
var.get.nc(nc_nwis, "time"), "c") | |
site <- nwis_sites[50] | |
nwis_index <- which(nwis_sites == site) | |
nwis_data <- data.frame(time = nwis_time, | |
q = var.get.nc(nc_nwis, "Discharge", | |
start = c(1, nwis_index), | |
count = c(NA, 1), unpack = TRUE)) | |
has_data <- which(!is.na(nwis_data$q)) | |
start <- nwis_data$time[has_data[1]] | |
end <- nwis_data$time[tail(has_data, 1)] | |
p_nwis <- filter(nwis_data, time > start & time < end) | |
plot(p_nwis$time, p_nwis$q) |
Author
dblodgett-usgs
commented
May 14, 2022
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment