daltare · May 6, 2024 01:31
diff --git a/ceden-data-portal-API_single-year.r b/ceden-data-portal-API_single-year.r
 # This example shows one way to access subsets of CEDEN data via the California open data portal API (it's not 
 # necessarily the only or best way though) - if you find any problems or have questions, please contact: [email protected]

 # This example applies to the following CEDEN datasets on the open data portal:
    # Toxicity: https://data.ca.gov/dataset/surface-water-toxicity-results/resource/bd484e9b-426a-4ba6-ba4d-f5f8ce095836
    # Benthic Macroinvertebrates: https://data.ca.gov/dataset/surface-water-benthic-macroinvertebrate-results/resource/3dfee140-47d5-4e29-99ae-16b9b12a404f
 # (NOTE: other CEDEN datasets are split by year, and cannont be queried across multiple years using this method -- for other CEDEN datasets, see: https://gist.github.com/daltare/2de1517ad1e315c4b1cad01278de96dd)

 # This example assumes that you have your own API key saved in a system environment variable 
 # named: "data_portal_key" (to obtain an API key, create an account at data.ca.gov, then go to your 
 # account page and look for the "API Key" header in the lower left side of the page)

 # NOTE: There appears to be a limit of 32,000 records that can be returned from the API in a single call -- if your API query 
 # is likely to return more records than that limit, you'll need to break it up into multiple calls to the API
 # The function below returns a warning message when this occurs

 # load packages
 library(ckanr)
 library(dplyr)
 library(lubridate)
 library(httr)
 library(jsonlite)
 library(glue)
 library(readr)
 library(purrr)

 # set the option to use standard notation rather than scientific notation
 options(scipen = 999) 


 # create a generic function to run an API query and return the formatted results, given a URL for the query 
 generic_api_query <- function(query_url) {
    # encode URL
    url_encoded <- URLencode(query_url)
    
    # get the data
    query_response <- httr::GET(url_encoded, add_headers(Authorization = Sys.getenv('data_portal_key')))
    query_char <- rawToChar(query_response$content)
    query_content <- jsonlite::fromJSON(query_char)
    resource_records <- query_content$result$records
    
    # get information about the fields included in the dataset (allows for reording the fields to match the original dataset)
    resource_fields <- query_content$result$fields %>% 
        filter(!id %in% c('_id', '_full_text')) %>%  # drop these two fields, which are added by the portal and not actually part of the dataset
        pull(id)
    
    # format the data- convert to tibble, set the data types, and reorder the fields to match the order of the original dataset
    resource_records <- tibble(resource_records) %>% 
        type_convert(na = c("", "NaN")) %>% 
        select(all_of(resource_fields))
    
    # print a warning if the API limit was reached by the query
    if (nrow(resource_records) == 32000) {
        warning('WARNING: the API record limit was reached - it\'s likely that some records matching your query were not returned')
    }
    
    # return the results
    return(resource_records)
 }        


 # create a function to download data from the API, given an input resource ID and optional filters for date, text, and numeric fields
 get_api_data <- function(resource_id, filters_date_start = NA, filters_date_end = NA, filters_text = NA, filters_numeric = NA, filters_numeric_drop_na = NA) {
    # NOTES: any of the filters_... variables can be left as NA to ignore them
        # for date and text filters, enter inputs as named character vectors (multiple name-value pairs can be enterd) -- e.g: c('field_name' = 'query_value')
        # for numeric filters, include the condition (>, <, >=, <=, <>) as part of the value (multiple name-condition pairs can be entered) -- e.g.: c('field_name' = '> query_value')
        # for the filters_numeric_drop_na field, just enter the name of the field to drop missing numeric values from (multiple field names can be entered) -- e.g., c('field_name')
    
    # format the filter for minimum/starting dates
    if (!is.na(filters_date_start[1])) {
        query_date_start <- map2_chr(filters_date_start, names(filters_date_start), ~glue("\"{.y}\" >= '{.x}'")) %>% 
            unname() %>% 
            glue_collapse(sep = ' AND ')
    } else {
        query_date_start <- c()
    }
    
    # format the filters for maximum/ending dates
    if (!is.na(filters_date_end[1])) {
        query_date_end <- map2_chr(filters_date_end, names(filters_date_end), ~glue("\"{.y}\" <= '{.x}'")) %>% 
            unname() %>% 
            glue_collapse(sep = ' AND ')
    } else {
        query_date_end <- c()
    }
    
    # format the filters for text values
    if (!is.na(filters_text[1])) {
        query_text_filter <- map2_chr(filters_text, names(filters_text), ~glue("\"{.y}\" = '{.x}'")) %>% 
            unname() %>% 
            glue_collapse(sep = ' AND ')
    } else {
        query_text_filter <- c()
    }
    
    # format the filters for numeric values
    if (!is.na(filters_numeric[1])) {
        query_numeric_filter <- map2_chr(filters_numeric, names(filters_numeric), ~glue("\"{.y}\" {.x}")) %>% 
            unname() %>% 
            glue_collapse(sep = ' AND ') 
    } else {
        query_numeric_filter <- c()
    }
    
    # format the filters to drop missing/null numeric values
    if (!is.na(filters_numeric_drop_na[1])) {
        query_numeric_drop_na <- map_chr(filters_numeric_drop_na, ~glue("\"{.x}\" <> '{NaN}'")) %>% 
            # unname() %>% 
            glue_collapse(sep = ' AND ') 
    } else {
        query_numeric_drop_na <- c()
    }
    
    # combine all of the different filter types into one statement
    filters_combined <- glue_collapse(c(query_date_start, query_date_end, query_text_filter, query_numeric_filter, query_numeric_drop_na), sep = ' AND ')
    
    # create URL for the API call
    api_url <- glue("https://data.ca.gov/api/3/action/datastore_search_sql?sql=SELECT * from \"{resource_id}\"")
    if (length(filters_combined) > 0) { # if there are additional filters, add them
        api_url <- glue("{api_url} WHERE {filters_combined}")
    }
    
    # submit the API query using the constructed url, and return the resulting data frame
    return(generic_api_query(api_url))
 }
   

 #### EXAMPLE QUERY - GET DATA (using the toxicity dataset) ####
 # make an API call to get some data (in this case, get samples between 2016-07-01 and 2018-06-30,
 # where the analyte name is "Oxygen, Dissolved, Total", the units are "mg/L", and the result value is 
 # greater than 10 and is not missing/null)
 df_toxicity <- get_api_data(resource_id = 'bd484e9b-426a-4ba6-ba4d-f5f8ce095836',
                            filters_date_start = c('SampleDate' = '2016-07-01'),
                            filters_date_end = c('SampleDate' = '2018-06-30'),
                            filters_text = c('Analyte' = 'Oxygen, Dissolved, Total',
                                             'Unit' = 'mg/L'),
                            filters_numeric = c('Result' = '> 10'), 
                            filters_numeric_drop_na = c('Result'))


 #### EXAMPLE QUERY - VERIFY DATA ####
 # verify that records with the correct analyte names and units were returned, and get the number of records
 df_toxicity %>% count(Analyte, Unit)
    # # A tibble: 1 x 3
    #   Analyte                  Unit      n
    #   <chr>                    <chr> <int>
    # 1 Oxygen, Dissolved, Total mg/L   1463

 # Verify that the sample date range is correct
 range(df_toxicity$SampleDate)
    # [1] "2016-07-11 UTC" "2018-06-21 UTC"

 # Verify that the result range is correct
 range(df_toxicity$Result)
    # [1] 10.07 88.00

 # view a summary of the entire dataset
 glimpse(df_toxicity)
    # Rows: 1,463
    # Columns: 88
    # $ Program                     <chr> "BASMAA Regional Monitoring Coalition", "BASMAA Regional M...
    # $ ParentProject               <chr> "BASMAA RMC Monitoring in WY2016", "BASMAA RMC Monitoring ...
    # $ Project                     <chr> "ACCWP Creek Status Monitoring in WY2016", "SCVURPPP Pesti...
    # $ StationName                 <chr> "Sausal at E.22nd", "San Tomas Aquino 250m u/s Mission Col...
    # $ StationCode                 <chr> "204SAU030", "205STQ010", "205STQ010", "205STE021", "205ST...
    # $ SampleDate                  <dttm> 2016-07-11, 2016-07-11, 2016-07-11, 2016-07-11, 2016-07-1...
    # $ CollectionTime              <dttm> 1899-12-30 12:40:00, 1899-12-30 14:30:00, 1899-12-30 14:3...
    # $ LocationCode                <chr> "Midchannel", "Bank", "Bank", "Bank", "Bank", "Midchannel"...
    # $ CollectionDepth             <dbl> 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1...
    # $ UnitCollectionDepth         <chr> "m", "m", "m", "m", "m", "m", "m", "m", "m", "m", "m", "m"...
    # $ SampleTypeCode              <chr> "Grab", "Grab", "Grab", "FieldBLDup_Grab", "Grab", "Grab",...
    # $ CollectionReplicate         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
    # $ LabReplicate                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
    # $ ToxBatch                    <chr> "AMS_071216PP_C1_W_TOX", "KLI_071216_PP_C2_W_TOX", "KLI_07...
    # $ LabSampleID                 <chr> "204SAU030-W-06", "205STQ010-W-01", "205STQ010-W-01", "250...
    # $ MatrixName                  <chr> "samplewater", "samplewater", "samplewater", "samplewater"...
    # $ MethodName                  <chr> "EPA 821/R-02-013", "EPA 821/R-02-013", "EPA 821/R-02-013"...
    # $ ToxTestDurCode              <chr> "7 days", "7 days", "4 days", "4 days", "4 days", "4 days"...
    # $ OrganismName                <chr> "Pimephales promelas", "Pimephales promelas", "Selenastrum...
    # $ Analyte                     <chr> "Oxygen, Dissolved, Total", "Oxygen, Dissolved, Total", "O...
    # $ Unit                        <chr> "mg/L", "mg/L", "mg/L", "mg/L", "mg/L", "mg/L", "mg/L", "m...
    # $ WQSource                    <chr> "overlyingwater", "overlyingwater", "overlyingwater", "ove...
    # $ TimePointName               <chr> "initial", "initial", "final", "initial", "initial", "init...
    # $ Result                      <dbl> 10.2, 12.0, 11.4, 10.1, 10.1, 11.2, 11.0, 11.5, 16.0, 15.0...
    # $ ResultQualCode              <chr> "=", "=", "=", "=", "=", "=", "=", "=", "=", "=", "=", "="...
    # $ ToxResultQACode             <chr> "None", "None", "None", "None", "None", "None", "None", "N...
    # $ QACode                      <chr> "None", "None", "None", "None", "None", "None", "None", "N...
    # $ BatchVerificationCode       <chr> "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR"...
    # $ ComplianceCode              <chr> "NR", "Com", "Com", "Com", "Com", "Com", "Com", "Com", "NR...
    # $ SampleComments              <chr> NA, "Same as 205R01411, sampled 2015", "Same as 205R01411,...
    # $ CollectionComments          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ ToxTestComments             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ TimePointComments           <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ ToxResultComments           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ ToxBatchComments            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ EventCode                   <chr> "WQ", "WQ", "WQ", "WQ", "WQ", "WQ", "WQ", "WQ", "WQ", "WQ"...
    # $ ProtocolCode                <chr> "BASMAA_RMC_SOP2016v3", "BASMAA_RMC_SOP2016v3", "BASMAA_RM...
    # $ SampleAgency                <chr> "Applied Marine Sciences, Inc. California", "Santa Clara V...
    # $ GroupSamples                <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ CollectionMethodName        <chr> "Water_Grab", "Water_Grab", "Water_Grab", "Water_Grab", "W...
    # $ Latitude                    <dbl> 37.78566, 37.38654, 37.38654, 37.41056, 37.41056, 37.61729...
    # $ Longitude                   <dbl> -122.2242, -121.9688, -121.9688, -122.0689, -122.0689, -12...
    # $ CollectionDeviceDescription <chr> "Not Recorded", "Not Recorded", "Not Recorded", "Not Recor...
    # $ PositionWaterColumn         <chr> "Not Recorded", "Not Recorded", "Not Recorded", "Not Recor...
    # $ TIENarrative                <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ Dilution                    <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100...
    # $ ToxPointMethod              <chr> "ToxWQMeasurement", "ToxWQMeasurement", "ToxWQMeasurement"...
    # $ Treatment                   <chr> "None", "None", "None", "None", "None", "None", "None", "N...
    # $ UnitTreatment               <chr> "none", "none", "none", "none", "none", "none", "none", "n...
    # $ TreatmentConcentration      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
    # $ LabAgency                   <chr> "PER", "PER", "PER", "PER", "PER", "PER", "PER", "PER", "P...
    # $ SubmittingAgency            <chr> "AMS-CA", "SCVURPPP", "SCVURPPP", "SCVURPPP", "SCVURPPP", ...
    # $ ToxBatchStartDate           <chr> "Jul 12 2016  2:30PM", "Jul 12 2016 12:00AM", "Jul 12 2016...
    # $ RefToxBatch                 <chr> "25990PPRTPER", "25990PPRTPER", "26019SCRTPER", "26021CHDR...
    # $ OrganismAgeAtTestStart      <chr> "<48 hrs", "<48 hrs", "6 days", "7 days", "7 days", "7 day...
    # $ LabSubmissionCode           <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"...
    # $ OccupationMethod            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Walk ...
    # $ StartingBank                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "LB", ...
    # $ DistanceFromBank            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -88, N...
    # $ UnitDistanceFromBank        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "m", N...
    # $ StreamWidth                 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -88, N...
    # $ UnitStreamWidth             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "m", N...
    # $ StationWaterDepth           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -88, N...
    # $ UnitStationWaterDepth       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "m", N...
    # $ HydroMod                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Pipes...
    # $ HydroModLoc                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "US", ...
    # $ LocationDetailWQComments    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "upstr...
    # $ PctControl                  <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
    # $ RepCount                    <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
    # $ Mean                        <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
    # $ StdDev                      <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
    # $ StatMethod                  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ Probability                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ Alphalevel                  <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
    # $ EvalThreshold               <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
    # $ MSD                         <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
    # $ SigEffectCode               <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ ChannelWidth                <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ UpstreamLength              <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ DownstreamLength            <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ TotalReach                  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
    # $ CalculatedValue             <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
    # $ PercentEffect               <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
    # $ SampleID                    <chr> "204SAU030-W-06", "205STQ010-W-01", "205STQ010-W-01", "250...
    # $ DW_AnalyteName              <chr> "Oxygen, Dissolved", "Oxygen, Dissolved", "Oxygen, Dissolv...
    # $ DataQuality                 <chr> "Unknown data quality", "Unknown data quality", "Unknown d...
    # $ DataQualityIndicator        <chr> "BatchVerificationCode:NR", "BatchVerificationCode:NR", "B...
    # $ Datum                       <chr> "WGS84", "NAD83", "NAD83", "NAD83", "NAD83", "NAD83", "NAD...
	# This example shows one way to access subsets of CEDEN data via the California open data portal API (it's not
	# necessarily the only or best way though) - if you find any problems or have questions, please contact: [email protected]

	# This example applies to the following CEDEN datasets on the open data portal:
	# Toxicity: https://data.ca.gov/dataset/surface-water-toxicity-results/resource/bd484e9b-426a-4ba6-ba4d-f5f8ce095836
	# Benthic Macroinvertebrates: https://data.ca.gov/dataset/surface-water-benthic-macroinvertebrate-results/resource/3dfee140-47d5-4e29-99ae-16b9b12a404f
	# (NOTE: other CEDEN datasets are split by year, and cannont be queried across multiple years using this method -- for other CEDEN datasets, see: https://gist.github.com/daltare/2de1517ad1e315c4b1cad01278de96dd)

	# This example assumes that you have your own API key saved in a system environment variable
	# named: "data_portal_key" (to obtain an API key, create an account at data.ca.gov, then go to your
	# account page and look for the "API Key" header in the lower left side of the page)

	# NOTE: There appears to be a limit of 32,000 records that can be returned from the API in a single call -- if your API query
	# is likely to return more records than that limit, you'll need to break it up into multiple calls to the API
	# The function below returns a warning message when this occurs

	# load packages
	library(ckanr)
	library(dplyr)
	library(lubridate)
	library(httr)
	library(jsonlite)
	library(glue)
	library(readr)
	library(purrr)

	# set the option to use standard notation rather than scientific notation
	options(scipen = 999)


	# create a generic function to run an API query and return the formatted results, given a URL for the query
	generic_api_query <- function(query_url) {
	# encode URL
	url_encoded <- URLencode(query_url)

	# get the data
	query_response <- httr::GET(url_encoded, add_headers(Authorization = Sys.getenv('data_portal_key')))
	query_char <- rawToChar(query_response$content)
	query_content <- jsonlite::fromJSON(query_char)
	resource_records <- query_content$result$records

	# get information about the fields included in the dataset (allows for reording the fields to match the original dataset)
	resource_fields <- query_content$result$fields %>%
	filter(!id %in% c('_id', '_full_text')) %>% # drop these two fields, which are added by the portal and not actually part of the dataset
	pull(id)

	# format the data- convert to tibble, set the data types, and reorder the fields to match the order of the original dataset
	resource_records <- tibble(resource_records) %>%
	type_convert(na = c("", "NaN")) %>%
	select(all_of(resource_fields))

	# print a warning if the API limit was reached by the query
	if (nrow(resource_records) == 32000) {
	warning('WARNING: the API record limit was reached - it\'s likely that some records matching your query were not returned')
	}

	# return the results
	return(resource_records)
	}


	# create a function to download data from the API, given an input resource ID and optional filters for date, text, and numeric fields
	get_api_data <- function(resource_id, filters_date_start = NA, filters_date_end = NA, filters_text = NA, filters_numeric = NA, filters_numeric_drop_na = NA) {
	# NOTES: any of the filters_... variables can be left as NA to ignore them
	# for date and text filters, enter inputs as named character vectors (multiple name-value pairs can be enterd) -- e.g: c('field_name' = 'query_value')
	# for numeric filters, include the condition (>, <, >=, <=, <>) as part of the value (multiple name-condition pairs can be entered) -- e.g.: c('field_name' = '> query_value')
	# for the filters_numeric_drop_na field, just enter the name of the field to drop missing numeric values from (multiple field names can be entered) -- e.g., c('field_name')

	# format the filter for minimum/starting dates
	if (!is.na(filters_date_start[1])) {
	query_date_start <- map2_chr(filters_date_start, names(filters_date_start), ~glue("\"{.y}\" >= '{.x}'")) %>%
	unname() %>%
	glue_collapse(sep = ' AND ')
	} else {
	query_date_start <- c()
	}

	# format the filters for maximum/ending dates
	if (!is.na(filters_date_end[1])) {
	query_date_end <- map2_chr(filters_date_end, names(filters_date_end), ~glue("\"{.y}\" <= '{.x}'")) %>%
	unname() %>%
	glue_collapse(sep = ' AND ')
	} else {
	query_date_end <- c()
	}

	# format the filters for text values
	if (!is.na(filters_text[1])) {
	query_text_filter <- map2_chr(filters_text, names(filters_text), ~glue("\"{.y}\" = '{.x}'")) %>%
	unname() %>%
	glue_collapse(sep = ' AND ')
	} else {
	query_text_filter <- c()
	}

	# format the filters for numeric values
	if (!is.na(filters_numeric[1])) {
	query_numeric_filter <- map2_chr(filters_numeric, names(filters_numeric), ~glue("\"{.y}\" {.x}")) %>%
	unname() %>%
	glue_collapse(sep = ' AND ')
	} else {
	query_numeric_filter <- c()
	}

	# format the filters to drop missing/null numeric values
	if (!is.na(filters_numeric_drop_na[1])) {
	query_numeric_drop_na <- map_chr(filters_numeric_drop_na, ~glue("\"{.x}\" <> '{NaN}'")) %>%
	# unname() %>%
	glue_collapse(sep = ' AND ')
	} else {
	query_numeric_drop_na <- c()
	}

	# combine all of the different filter types into one statement
	filters_combined <- glue_collapse(c(query_date_start, query_date_end, query_text_filter, query_numeric_filter, query_numeric_drop_na), sep = ' AND ')

	# create URL for the API call
	api_url <- glue("https://data.ca.gov/api/3/action/datastore_search_sql?sql=SELECT * from \"{resource_id}\"")
	if (length(filters_combined) > 0) { # if there are additional filters, add them
	api_url <- glue("{api_url} WHERE {filters_combined}")
	}

	# submit the API query using the constructed url, and return the resulting data frame
	return(generic_api_query(api_url))
	}


	#### EXAMPLE QUERY - GET DATA (using the toxicity dataset) ####
	# make an API call to get some data (in this case, get samples between 2016-07-01 and 2018-06-30,
	# where the analyte name is "Oxygen, Dissolved, Total", the units are "mg/L", and the result value is
	# greater than 10 and is not missing/null)
	df_toxicity <- get_api_data(resource_id = 'bd484e9b-426a-4ba6-ba4d-f5f8ce095836',
	filters_date_start = c('SampleDate' = '2016-07-01'),
	filters_date_end = c('SampleDate' = '2018-06-30'),
	filters_text = c('Analyte' = 'Oxygen, Dissolved, Total',
	'Unit' = 'mg/L'),
	filters_numeric = c('Result' = '> 10'),
	filters_numeric_drop_na = c('Result'))


	#### EXAMPLE QUERY - VERIFY DATA ####
	# verify that records with the correct analyte names and units were returned, and get the number of records
	df_toxicity %>% count(Analyte, Unit)
	# # A tibble: 1 x 3
	# Analyte Unit n
	# <chr> <chr> <int>
	# 1 Oxygen, Dissolved, Total mg/L 1463

	# Verify that the sample date range is correct
	range(df_toxicity$SampleDate)
	# [1] "2016-07-11 UTC" "2018-06-21 UTC"

	# Verify that the result range is correct
	range(df_toxicity$Result)
	# [1] 10.07 88.00

	# view a summary of the entire dataset
	glimpse(df_toxicity)
	# Rows: 1,463
	# Columns: 88
	# $ Program <chr> "BASMAA Regional Monitoring Coalition", "BASMAA Regional M...
	# $ ParentProject <chr> "BASMAA RMC Monitoring in WY2016", "BASMAA RMC Monitoring ...
	# $ Project <chr> "ACCWP Creek Status Monitoring in WY2016", "SCVURPPP Pesti...
	# $ StationName <chr> "Sausal at E.22nd", "San Tomas Aquino 250m u/s Mission Col...
	# $ StationCode <chr> "204SAU030", "205STQ010", "205STQ010", "205STE021", "205ST...
	# $ SampleDate <dttm> 2016-07-11, 2016-07-11, 2016-07-11, 2016-07-11, 2016-07-1...
	# $ CollectionTime <dttm> 1899-12-30 12:40:00, 1899-12-30 14:30:00, 1899-12-30 14:3...
	# $ LocationCode <chr> "Midchannel", "Bank", "Bank", "Bank", "Bank", "Midchannel"...
	# $ CollectionDepth <dbl> 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1...
	# $ UnitCollectionDepth <chr> "m", "m", "m", "m", "m", "m", "m", "m", "m", "m", "m", "m"...
	# $ SampleTypeCode <chr> "Grab", "Grab", "Grab", "FieldBLDup_Grab", "Grab", "Grab",...
	# $ CollectionReplicate <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
	# $ LabReplicate <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
	# $ ToxBatch <chr> "AMS_071216PP_C1_W_TOX", "KLI_071216_PP_C2_W_TOX", "KLI_07...
	# $ LabSampleID <chr> "204SAU030-W-06", "205STQ010-W-01", "205STQ010-W-01", "250...
	# $ MatrixName <chr> "samplewater", "samplewater", "samplewater", "samplewater"...
	# $ MethodName <chr> "EPA 821/R-02-013", "EPA 821/R-02-013", "EPA 821/R-02-013"...
	# $ ToxTestDurCode <chr> "7 days", "7 days", "4 days", "4 days", "4 days", "4 days"...
	# $ OrganismName <chr> "Pimephales promelas", "Pimephales promelas", "Selenastrum...
	# $ Analyte <chr> "Oxygen, Dissolved, Total", "Oxygen, Dissolved, Total", "O...
	# $ Unit <chr> "mg/L", "mg/L", "mg/L", "mg/L", "mg/L", "mg/L", "mg/L", "m...
	# $ WQSource <chr> "overlyingwater", "overlyingwater", "overlyingwater", "ove...
	# $ TimePointName <chr> "initial", "initial", "final", "initial", "initial", "init...
	# $ Result <dbl> 10.2, 12.0, 11.4, 10.1, 10.1, 11.2, 11.0, 11.5, 16.0, 15.0...
	# $ ResultQualCode <chr> "=", "=", "=", "=", "=", "=", "=", "=", "=", "=", "=", "="...
	# $ ToxResultQACode <chr> "None", "None", "None", "None", "None", "None", "None", "N...
	# $ QACode <chr> "None", "None", "None", "None", "None", "None", "None", "N...
	# $ BatchVerificationCode <chr> "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR"...
	# $ ComplianceCode <chr> "NR", "Com", "Com", "Com", "Com", "Com", "Com", "Com", "NR...
	# $ SampleComments <chr> NA, "Same as 205R01411, sampled 2015", "Same as 205R01411,...
	# $ CollectionComments <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ ToxTestComments <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ TimePointComments <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ ToxResultComments <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ ToxBatchComments <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ EventCode <chr> "WQ", "WQ", "WQ", "WQ", "WQ", "WQ", "WQ", "WQ", "WQ", "WQ"...
	# $ ProtocolCode <chr> "BASMAA_RMC_SOP2016v3", "BASMAA_RMC_SOP2016v3", "BASMAA_RM...
	# $ SampleAgency <chr> "Applied Marine Sciences, Inc. California", "Santa Clara V...
	# $ GroupSamples <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ CollectionMethodName <chr> "Water_Grab", "Water_Grab", "Water_Grab", "Water_Grab", "W...
	# $ Latitude <dbl> 37.78566, 37.38654, 37.38654, 37.41056, 37.41056, 37.61729...
	# $ Longitude <dbl> -122.2242, -121.9688, -121.9688, -122.0689, -122.0689, -12...
	# $ CollectionDeviceDescription <chr> "Not Recorded", "Not Recorded", "Not Recorded", "Not Recor...
	# $ PositionWaterColumn <chr> "Not Recorded", "Not Recorded", "Not Recorded", "Not Recor...
	# $ TIENarrative <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ Dilution <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100...
	# $ ToxPointMethod <chr> "ToxWQMeasurement", "ToxWQMeasurement", "ToxWQMeasurement"...
	# $ Treatment <chr> "None", "None", "None", "None", "None", "None", "None", "N...
	# $ UnitTreatment <chr> "none", "none", "none", "none", "none", "none", "none", "n...
	# $ TreatmentConcentration <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
	# $ LabAgency <chr> "PER", "PER", "PER", "PER", "PER", "PER", "PER", "PER", "P...
	# $ SubmittingAgency <chr> "AMS-CA", "SCVURPPP", "SCVURPPP", "SCVURPPP", "SCVURPPP", ...
	# $ ToxBatchStartDate <chr> "Jul 12 2016 2:30PM", "Jul 12 2016 12:00AM", "Jul 12 2016...
	# $ RefToxBatch <chr> "25990PPRTPER", "25990PPRTPER", "26019SCRTPER", "26021CHDR...
	# $ OrganismAgeAtTestStart <chr> "<48 hrs", "<48 hrs", "6 days", "7 days", "7 days", "7 day...
	# $ LabSubmissionCode <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"...
	# $ OccupationMethod <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Walk ...
	# $ StartingBank <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "LB", ...
	# $ DistanceFromBank <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -88, N...
	# $ UnitDistanceFromBank <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "m", N...
	# $ StreamWidth <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -88, N...
	# $ UnitStreamWidth <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "m", N...
	# $ StationWaterDepth <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -88, N...
	# $ UnitStationWaterDepth <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "m", N...
	# $ HydroMod <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Pipes...
	# $ HydroModLoc <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "US", ...
	# $ LocationDetailWQComments <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "upstr...
	# $ PctControl <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
	# $ RepCount <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
	# $ Mean <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
	# $ StdDev <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
	# $ StatMethod <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ Probability <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ Alphalevel <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
	# $ EvalThreshold <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
	# $ MSD <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
	# $ SigEffectCode <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ ChannelWidth <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ UpstreamLength <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ DownstreamLength <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ TotalReach <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
	# $ CalculatedValue <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
	# $ PercentEffect <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN...
	# $ SampleID <chr> "204SAU030-W-06", "205STQ010-W-01", "205STQ010-W-01", "250...
	# $ DW_AnalyteName <chr> "Oxygen, Dissolved", "Oxygen, Dissolved", "Oxygen, Dissolv...
	# $ DataQuality <chr> "Unknown data quality", "Unknown data quality", "Unknown d...
	# $ DataQualityIndicator <chr> "BatchVerificationCode:NR", "BatchVerificationCode:NR", "B...
	# $ Datum <chr> "WGS84", "NAD83", "NAD83", "NAD83", "NAD83", "NAD83", "NAD...