Skip to content

Instantly share code, notes, and snippets.

@dblodgett-usgs
Created May 14, 2022 19:32
Show Gist options
  • Save dblodgett-usgs/e6504a2ecf2d26eb2de580cc3109a9e2 to your computer and use it in GitHub Desktop.
Save dblodgett-usgs/e6504a2ecf2d26eb2de580cc3109a9e2 to your computer and use it in GitHub Desktop.
nwis dv to netcdf-dsg with the ncdfgeom package.
library(sf)
library(dplyr)
library(dataRetrieval)
library(RNetCDF)
# https://water.usgs.gov/GIS/metadata/usgswrd/XML/gagesII_Sept2011.xml
gagesii <- sf::read_sf("R/nexus_locations/gagesII_9322_point_shapefile/gagesII_9322_sept30_2011.shp")
# Just look at reference gages with recent active status and more than 80 years of record.
ref <- dplyr::filter(gagesii, CLASS == "Ref" & ACTIVE09 == "yes" & FLYRS1900 > 80)
i<-1
for(site in ref$STAID) {
file <- paste0("out/", site, ".rds")
if(!file.exists(file)) {
try(
# just builds a cache of data for later.
saveRDS(dataRetrieval::readNWISdv(site, "00060", startDate = "1900-01-01"),
file = file), silent = FALSE)
print(paste(site, i))
}
i <- i + 1
}
outfiles <- list.files("out/")
out_times <- seq(from = as.Date("1900-01-01"),
to = as.Date("2022-05-14 00:00"), by = "day")
out_data <- data.frame(matrix(numeric(0),
nrow = length(out_times),
ncol = length(outfiles)))
out_meta <- data.frame(matrix(character(0),
nrow = length(out_times),
ncol = length(outfiles)), stringsAsFactors = FALSE)
names(out_data) <- names(out_meta) <- gsub(".rds", "", outfiles)
max_gap <- 5
# now we can step through the data and populate our big array.
i <- 0
for(f in outfiles) {
dat <- readRDS(file.path("out/", f))
if(nrow(dat) > 0) {
site <- dat$site_no[1]
out_dat <- approx(dat$Date, dat$X_00060_00003, out_times, rule = 1)
out_met <- data.frame(dateTime = out_times, seq = seq(1, length(out_times)))
out_met_temp <- approx(out_met$dateTime, out_met$seq, dat$Date, method = "constant") %>%
as.data.frame() %>%
left_join(select(dat, dt = Date, cd = X_00060_00003_cd), by = c("x" = "dt")) %>%
group_by(y) %>%
filter(row_number() == 1) %>%
ungroup() %>% select(-x)
out_met <- left_join(out_met, out_met_temp, by = c("seq" = "y")) %>%
select(-seq)
# the method here fills missing record but we need to remove the stuff
# that was interpolated in a large gap.
diff_time <- diff(as.numeric(dat$Date))
too_big_ind <- which(diff_time > max_gap)
too_big_start <- dat$Date[too_big_ind]
too_big_end <- dat$Date[too_big_ind + 1]
for(gap in 1:length(too_big_start)) {
test <- out_dat$x > too_big_start[gap] & out_dat$x < too_big_end[gap]
out_dat$y[test] <- NA
out_met$cd[test] <- ""
}
out_data[site] <- out_dat$y
out_meta[site] <- out_met$cd
}
i<-i+1
print(paste(i, length(outfiles)))
}
attributes <- data.frame(staid = names(out_data)) %>%
left_join(st_set_geometry(ref, NULL), by = c("staid" = "STAID"))
lats <- attributes$LAT_GAGE
lons <- attributes$LNG_GAGE
attributes <- select(attributes,
station_name = STANAME,
da_sqkm = DRAIN_SQKM,
hcdn_2009 = HCDN_2009)
attributes$hcdn_2009[is.na(attributes$hcdn_2009)] <- "no"
ncdfgeom::write_timeseries_dsg("top_nwis_dv.nc",
instance_names = names(out_data),
lats = lats, lons = lons,
times = out_times,
data = out_data,
data_unit = "ft^3/s",
data_prec = "float",
data_metadata = list(name = "Discharge",
long_name = "Observed daily river discharge in CFS"),
overwrite = TRUE)
ncdfgeom::write_timeseries_dsg("top_nwis_dv.nc",
instance_names = names(out_meta),
lats = lats,
lons = lons,
times = out_times,
data = out_meta,
data_unit = "none",
data_prec = "char",
data_metadata = list(name = "quality_flags",
long_name = "NWIS quality flags"),
add_to_existing = TRUE)
ncdfgeom::write_attribute_data("top_nwis_dv.nc", attributes,
units = c("none", "km^2", "none"))
system("nccopy -k nc4 -d 1 -c instance/1,time/,char/,instance_name_char/,quality_flags_char/ top_nwis_dv.nc top_nwis_dv_2.nc")
file.rename("top_nwis_dv_2.nc", "top_nwis_dv.nc")
saveRDS(out_data, "out_data.rds")
saveRDS(out_meta, "out_meta.rds")
saveRDS(out_times, "out_times.rds")
##### Testing and exploration #####
nwis_sites <- var.get.nc(nc_nwis, "instance_name")
nwm_lookup <- var.get.nc(nc_nwis, "COMID")
nwis_time <- utcal.nc(att.get.nc(nc_nwis, "time", "units"),
var.get.nc(nc_nwis, "time"), "c")
site <- nwis_sites[50]
nwis_index <- which(nwis_sites == site)
nwis_data <- data.frame(time = nwis_time,
q = var.get.nc(nc_nwis, "Discharge",
start = c(1, nwis_index),
count = c(NA, 1), unpack = TRUE))
has_data <- which(!is.na(nwis_data$q))
start <- nwis_data$time[has_data[1]]
end <- nwis_data$time[tail(has_data, 1)]
p_nwis <- filter(nwis_data, time > start & time < end)
plot(p_nwis$time, p_nwis$q)
@dblodgett-usgs
Copy link
Author

image

netcdf top_nwis_dv {
dimensions:
        instance = 99 ;
        time = 44694 ;
        instance_name_char = 8 ;
        quality_flags_char = 5 ;
        char = 50 ;
variables:
        char instance_name(instance, instance_name_char) ;
                instance_name:long_name = "Station Names" ;
                instance_name:cf_role = "timeseries_id" ;
                instance_name:_Storage = "chunked" ;
                instance_name:_ChunkSizes = 99, 8 ;
                instance_name:_DeflateLevel = 1 ;
                instance_name:_NoFill = "true" ;
        double time(time) ;
                time:units = "days since 1970-01-01 00:00:00" ;
                time:missing_value = -999. ;
                time:long_name = "time of measurement" ;
                time:standard_name = "time" ;
                time:_Storage = "chunked" ;
                time:_ChunkSizes = 44694 ;
                time:_DeflateLevel = 1 ;
                time:_Endianness = "little" ;
                time:_NoFill = "true" ;
        double lat(instance) ;
                lat:units = "degrees_north" ;
                lat:missing_value = -999. ;
                lat:long_name = "latitude of the measurement" ;
                lat:standard_name = "latitude" ;
                lat:_Storage = "chunked" ;
                lat:_ChunkSizes = 99 ;
                lat:_DeflateLevel = 1 ;
                lat:_Endianness = "little" ;
                lat:_NoFill = "true" ;
        double lon(instance) ;
                lon:units = "degrees_east" ;
                lon:missing_value = -999. ;
                lon:long_name = "longitude of the measurement" ;
                lon:standard_name = "longitude" ;
                lon:_Storage = "chunked" ;
                lon:_ChunkSizes = 99 ;
                lon:_DeflateLevel = 1 ;
                lon:_Endianness = "little" ;
                lon:_NoFill = "true" ;
        float Discharge(instance, time) ;
                Discharge:units = "ft^3/s" ;
                Discharge:missing_value = -2.147484e+09f ;
                Discharge:long_name = "Observed daily river discharge in CFS" ;
                Discharge:coordinates = "time lat lon" ;
                Discharge:_Storage = "chunked" ;
                Discharge:_ChunkSizes = 1, 44694 ;
                Discharge:_DeflateLevel = 1 ;
                Discharge:_Endianness = "little" ;
                Discharge:_NoFill = "true" ;
        char quality_flags(instance, time, quality_flags_char) ;
                quality_flags:units = "none" ;
                quality_flags:long_name = "NWIS quality flags" ;
                quality_flags:coordinates = "time lat lon" ;
                quality_flags:_Storage = "chunked" ;
                quality_flags:_ChunkSizes = 1, 44694, 5 ;
                quality_flags:_DeflateLevel = 1 ;
                quality_flags:_NoFill = "true" ;
        char station_name(instance, char) ;
                station_name:units = "none" ;
                station_name:_Storage = "chunked" ;
                station_name:_ChunkSizes = 99, 50 ;
                station_name:_DeflateLevel = 1 ;
                station_name:_NoFill = "true" ;
        double da_sqkm(instance) ;
                da_sqkm:units = "km^2" ;
                da_sqkm:missing_value = -9999.999 ;
                da_sqkm:_Storage = "chunked" ;
                da_sqkm:_ChunkSizes = 99 ;
                da_sqkm:_DeflateLevel = 1 ;
                da_sqkm:_Endianness = "little" ;
                da_sqkm:_NoFill = "true" ;
        char hcdn_2009(instance, char) ;
                hcdn_2009:units = "none" ;
                hcdn_2009:_Storage = "chunked" ;
                hcdn_2009:_ChunkSizes = 99, 50 ;
                hcdn_2009:_DeflateLevel = 1 ;
                hcdn_2009:_NoFill = "true" ;
        int COMID(instance) ;
                COMID:units = "nhdplusv2.1 comid" ;
                COMID:missing_value = -9999 ;
                COMID:_Storage = "chunked" ;
                COMID:_ChunkSizes = 99 ;
                COMID:_DeflateLevel = 1 ;
                COMID:_Endianness = "little" ;
                COMID:_NoFill = "true" ;

// global attributes:
                :Conventions = "CF-1.8" ;
                :featureType = "timeSeries" ;
                :cdm_data_type = "Station" ;
                :standard_name_vocabulary = "CF-1.8" ;
                :_NCProperties = "version=2,netcdf=4.8.0,hdf5=1.10.6" ;
                :_SuperblockVersion = 2 ;
                :_IsNetcdf4 = 1 ;
                :_Format = "netCDF-4" ;
}

@dblodgett-usgs
Copy link
Author

> top_nwis <- stars::read_ncdf("top_nwis_dv.nc")
Warning message:
In get_nc_list(nc, dsg, nc_meta, read_data) : no altitude coordinate found
> top_nwis
stars object with 2 dimensions and 1 attribute
attribute(s), summary of first 1e+05 cells:
           Min. 1st Qu. Median     Mean 3rd Qu.  Max.  NA's
Discharge     5     191    450 983.0053    1090 31700 17450
dimension(s):
       from    to         offset  delta  refsys point
time      1 44694 1900-01-01 UTC 1 days POSIXct FALSE
points    1    99             NA     NA  WGS 84  TRUE
                                                          values
time                                                        NULL
points POINT (-68.58264 47.23739),...,POINT (-156.5885 20.96208)

loc <- stars::st_dimensions(top_nwis)$points$values
mapview::mapview(loc)

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment