Last active
September 23, 2017 15:54
-
-
Save t-kalinowski/05900156476a0c792623daa39175543b to your computer and use it in GitHub Desktop.
Some helpers for package:units
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# modified from http://adv-r.had.co.nz/dsl.html | |
.wrap_bare_symbols_with_make_unit_call <- function(x) { | |
if (is.atomic(x)) { | |
x | |
} else if (is.name(x)) { | |
chr <- as.character(x) | |
if(!udunits2::ud.is.parseable(chr)) | |
stop("could not parse ", dbl_quote(chr)) | |
bquote(units::make_unit(.(as.character(x)))) | |
} | |
else if (is.call(x) || is.pairlist(x)) { | |
children <- lapply(x[-1], .wrap_bare_symbols_with_make_unit_call) | |
x[-1] <- children | |
x | |
} else { | |
stop("Don't know how to handle type ", typeof(x), | |
call. = FALSE) | |
} | |
} | |
.parse_chr_2_units <- function(chr) { | |
stopifnot(is.character(chr), length(chr) == 1L) | |
unit <- parse(text = chr)[[1]] | |
e <- try(unit <- .wrap_bare_symbols_with_make_unit_call(unit), silent = TRUE) | |
if(inherits(e, "try-error")) { | |
msg <- paste0('in unit "', chr,'", ', attr(e, "condition")$message) | |
return(stop(msg)) | |
} | |
eval(unit, baseenv()) | |
# list2env(list(make_unit = units::make_unit), parent = baseenv())) # #units::ud_units | |
} | |
#' make a unit object | |
#' | |
#' This is an alternative to units::make_unit with the following differences | |
#' \itemize{ | |
#' \item not only unit symobls, but also valid unit names parse correctly | |
#' \item strings of compound units like \code{"ug/l"} parse into correct symyolic_units | |
#' objects, with the denominator correct. This allows for more robust unit | |
#' simplification in complex equations | |
#' \item both expressions and strings are accepted | |
#' \item throws an error if the supplied arguments do not construct a valid unit | |
#' parsable by \code{\link[udunits2]{ud.is.parsable}} | |
#' } | |
#' | |
#' @param x a string, or a bare expression that describes a valid unit. See | |
#' examples for usage | |
#' @param .SEonly perform standard evaluation only. If true, the supplied value | |
#' must be a scalar character, otherwise an error is thrown. | |
#' | |
#' @return an object of class `symbolic_units`, just like [units::make_unit] | |
#' @export | |
#' | |
#' @importFrom rlang enquo quo_text | |
#' @import units | |
#' @seealso units::make_unit .udunits_symbols_info | |
#' | |
#' @noMd | |
#' @examples | |
#' # different ways to specify units | |
#' # all 3 should be identical | |
#' make_unit2(ug/l) | |
#' make_unit2("ug/l") | |
#' string <- "ug/l" | |
#' make_unit2(string) | |
#' | |
#' # valid unit names not found in units::ud_units also parse | |
#' make_unit2(ug/gallon) | |
#' make_unit2("ug/gallon") | |
#' string <- "ug/gallon" | |
#' make_unit2(string) | |
#' | |
#' # the normal evaluation of the supplied argument is attempted first before | |
#' # inspecting supplied expression. The expression is only inspected if it does | |
#' # not resolve to a character vector" | |
#' ug <- "kilogram" | |
#' make_unit2(ug) | |
#' # note that even if one of the symbols is bound, NSE is done on the whole | |
#' # expression or not at all | |
#' make_unit2(ug/l) | |
#' # to avoid expression parsing, use argument .SEonly (e.g., defensive programmig | |
#' # inside a function) | |
#' # make_unit2(ug/l, .SEonly = TRUE) # ERROR | |
#' make_unit2("ug/l", .SEonly = FALSE) # the default | |
#' make_unit2(ug, .SEonly = TRUE) | |
#' | |
#' # some examples for how to convert units | |
#' # first assign units to a numeric, this makes a vector with units | |
#' x <- 1:3 | |
#' (units(x) <- make_unit2(ug/l)) | |
#' # then, assigning units to a vector with units performs conversion | |
#' set_units(x, make_unit2("mg/l")) | |
#' set_units(x, make_unit2("g/l")) | |
#' set_units(x, make_unit2("ug/tbsp")) | |
#' set_units(x, make_unit2(kg/US_liquid_gallon)) | |
#' # `set_units(x, u)` is a pipe friendly equivelant version of `units(x) <- u` | |
#' (units(x) <- make_unit2("mg/l") ) | |
#' units(x) <- make_unit2(ug/l) | |
#' | |
#' # reserved words like in and special characters like % and ' must be | |
#' # backticked if passed as bare expression, however they work just fine if | |
#' # passed as a character string. | |
#' make_unit2(`in`) | |
#' make_unit2("in") | |
#' make_unit2("%/gallon") | |
#' make_unit2("%/'") | |
#' make_unit2("'/%") | |
#' | |
#' # this is commented out because of a documentation difficulty with roxygen | |
#' # throwing errors about mismatched quotes, but this should work | |
#' # make_unit2(`'` / `%`) | |
#' | |
#' | |
#' make_unit2(`%`/gallon) | |
#' | |
#' make_unit2(`%`*T) | |
#' make_unit2(T/F) | |
#' # make_unit2(T/FALSE) # ERROR | |
#' | |
#' # attempting to convert between incompatable units throws an error | |
#' # units(x) <- make_unit2(ft) # ERROR | |
#' # not recognized units throw an error | |
#' # make_unit2(foo/bar) # ERROR | |
make_unit2 <- function(x, .SEonly = FALSE) { | |
require(units) | |
ex <- enquo(x) | |
o <- try(chr <- force(x), silent = TRUE) | |
if (!.SEonly && (!is.character(o) || inherits(o, "try-error"))) | |
chr <- quo_text(ex) | |
stopifnot(is.character(chr), length(chr) == 1L) | |
reserved_word <- | |
"`?(%|'|\"|if|T|F|else|repeat|while|function|for|in|next|break|TRUE|FALSE|NULL|Inf|NaN|NA|NA_integer_|NA_real_|NA_complex_|NA_character_)`?" # removing \\b for now | |
# backtick reserved words and other characters that othwise might throw throw | |
# off parse(text = ) or resolve to something in the baseenv() (e.g., T). Don't | |
# doubleup backtick. | |
# FIXME: currently "kelvin" becomes "kelv`in`". Need to add a boundary check... | |
# simply adding \\b makes % and a few other characters fail though. | |
# | |
# NOTE: there are a bunch of other symbols in ?Syntax that perhaps should also | |
# be included here | |
# | |
# Question: why not just backtick everything? | |
chr <- gsub(reserved_word, "\\`\\1\\`", chr) | |
return(.parse_chr_2_units(chr)) | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# adapted from: | |
# https://github.com/edzer/units/blob/master/R/ud_units.R | |
# https://raw.githubusercontent.com/edzer/units/master/R/ud_units.R | |
# nocov start | |
# This is setup code and all fails if we do not do it and use the units | |
# in the list, so there are no explicit tests for this, thus the nocov | |
.get_ud_xml_dir <- function() { | |
requireNamespace("udunits2") | |
udunits2:::.onAttach() # prints out the filepath to the udunits2.xml database | |
# require(xml2) | |
udunits2_dir <- dirname(Sys.getenv("UDUNITS2_XML_PATH")) | |
if (udunits2_dir == "") | |
udunits2_dir <- "/usr/share/xml/udunits" | |
udunits2_dir | |
} | |
.read_ud_db_symbols <- function(dir, filename) { | |
if (! requireNamespace("xml2", quietly = TRUE)) | |
stop("package xml2 required to create ud_units database") | |
database <- xml2::read_xml(file.path(dir, filename)) | |
symbols <- xml2::xml_find_all(database, ".//symbol") | |
unlist(Map(function(node) as.character(xml2::xml_contents(node)), symbols)) | |
} | |
.read_ud_db_scales <- function(dir, filename) { | |
if (! requireNamespace("xml2", quietly = TRUE)) | |
stop("package xml2 required to create ud_units database") | |
database <- xml2::read_xml(file.path(dir, filename)) | |
symbols <- xml2::xml_find_all(database, ".//value") | |
symbols | |
unlist(Map(function(node) as.numeric(as.character(xml2::xml_contents(node))), symbols)) | |
} | |
.get_ud_symbols <- function() { | |
udunits2_dir <- .get_ud_xml_dir() | |
symbols <- c(.read_ud_db_symbols(udunits2_dir, "udunits2-base.xml"), | |
.read_ud_db_symbols(udunits2_dir, "udunits2-derived.xml"), | |
.read_ud_db_symbols(udunits2_dir, "udunits2-accepted.xml"), | |
.read_ud_db_symbols(udunits2_dir, "udunits2-common.xml")) | |
# symbols = symbols[symbols == make.names(symbols)] | |
## (this would drop "'" "\"" "%" "in") | |
symbols | |
} | |
.get_ud_prefixes <- function() { | |
udunits2_dir <- .get_ud_xml_dir() | |
.read_ud_db_symbols(udunits2_dir, "udunits2-prefixes.xml") | |
} | |
.construct_ud_units <- function(){ | |
ud_prefixes <- .get_ud_prefixes() | |
ud_symbols <- .get_ud_symbols() | |
expand_with_prefixes <- function(symbol) paste(ud_prefixes, symbol, sep = "") | |
symbols <- unique(c(ud_symbols, | |
unlist(Map(expand_with_prefixes, ud_symbols), use.names = FALSE))) | |
ud_units <- Map(make_unit, symbols) | |
names(ud_units) <- symbols | |
ud_units | |
} | |
# Use this to generate the data | |
# ud_units <- .construct_ud_units() | |
# nocov end | |
# ------------ end of copy from | |
# --------- https://raw.githubusercontent.com/edzer/units/master/R/ud_units.R | |
# library(purrr) | |
# library(dplyr) | |
# library(TKutils) | |
`%|%` <- rlang::`%|%` | |
`%empty%` <- function(x, y) if(length(x)==0) y else x | |
.read_ud_db <- function(dir, filename) { | |
if (! requireNamespace("xml2", quietly = TRUE)) | |
stop("package xml2 required to create ud_units database") | |
database <- xml2::read_xml(file.path(dir, filename)) | |
# xml2::as_list(database) | |
database | |
} | |
.db_list_as_dataframe <- function(db) { | |
xml_nodes <- xml_children(db) | |
map_dfr(seq_len(xml_length(db)), function(i) { | |
unit <- xml_nodes[[i]] | |
symbols <- xml_find_all(unit, ".//symbol") | |
symbols <- xml_text(symbols) %empty% "" | |
symbol <- symbols[ 1] | |
symbol_aliases <- pcc(symbols[-1]) | |
unit_names <- xml_find_all(unit, ".//name") | |
all_names <- unlist(map(unit_names, ~xml_text(xml_children(.x)))) | |
singular <- xml_find_all(unit_names, ".//singular") %>% xml_text() | |
plural <- xml_find_all(unit_names, ".//plural") %>% xml_text() | |
name_singular <- singular[ 1] %|% "" | |
name_singular_aliases <- pcc(singular[-1]) %|% "" | |
name_plural <- plural[ 1] %|% "" | |
name_plural_aliases <- pcc(plural[-1]) %|% "" | |
def <- xml_find_all(unit, ".//def") | |
def <- xml_text(def) %empty% "" | |
definition <- xml_find_all(unit, ".//definition") | |
definition <- xml_text(definition) %empty% "" | |
comment <- xml_find_all(unit, ".//comment") | |
comment <- xml_text(comment) %empty% "" | |
dimensionless <- xml_find_all(unit, ".//dimensionless") | |
dimensionless <- as.logical(length(dimensionless)) | |
# all node names that might be in a unit node | |
# db %>% xml_children() %>% map(~xml_children(.x) %>% xml_name()) %>% | |
# unique() %>% unlist() %>% unique() | |
# [1] "base" "name" "symbol" | |
# [4] "aliases" "definition" "def" | |
# [7] "comment" "dimensionless" | |
# rest_xml <- unit %>% xml_children() | |
# rest <- map(rest_xml, xml_text) | |
# names(rest) <- rest_xml %>% xml_name() | |
# rest <- list(rest) | |
tibble(symbol, symbol_aliases, | |
name_singular, name_singular_aliases, | |
name_plural, name_plural_aliases, | |
def, definition, comment, dimensionless) #, rest | |
}) | |
} | |
.get_ud_db_all <- function() { | |
udunits2_dir <- .get_ud_xml_dir() | |
base <- .read_ud_db(udunits2_dir, "udunits2-base.xml") # 7 | |
derv <- .read_ud_db(udunits2_dir, "udunits2-derived.xml") # 23 | |
acpt <- .read_ud_db(udunits2_dir, "udunits2-accepted.xml") # 24 | |
cmon <- .read_ud_db(udunits2_dir, "udunits2-common.xml") # 221 | |
len <- sum(sapply(list(base, derv, acpt, cmon), xml_length)) | |
bind_rows( | |
base = .db_list_as_dataframe(base), | |
derived = .db_list_as_dataframe(derv), | |
accepted = .db_list_as_dataframe(acpt), | |
common = .db_list_as_dataframe(cmon), | |
.id = "source_xml_table_name" | |
) | |
} | |
.get_ud_prefixes_xml <- function() { | |
udunits2_dir <- .get_ud_xml_dir() | |
.read_ud_db(udunits2_dir, "udunits2-prefixes.xml") | |
} | |
#' @name udunits-info | |
#' @export | |
.udunits_prefix_info <- function() { | |
pr <- .get_ud_prefixes_xml() | |
# all prefix valid names | |
# pr %>% xml_children() %>% map(~xml_children(.x) %>% xml_name()) %>% | |
# unlist() %>% unique() | |
# "value" "name" "symbol" | |
pr %>% | |
xml_children() %>% | |
map_dfr(function(prefix) { | |
symbols <- xml_find_all(prefix, ".//symbol") %>% xml_text() | |
symbol <- symbols[1] | |
symbol_aliases <- pcc(symbols[-1]) | |
name <- xml_find_all(prefix, ".//name") %>% xml_text() | |
value <- xml_find_all(prefix, ".//value") %>% xml_double() | |
tibble(symbol, symbol_aliases, name, value) | |
}) | |
} | |
#' Get information on valid units | |
#' | |
#' The returned dataframe is constructed at runtime by reading the xml database | |
#' that powers unit conversion in [package:udunits2]. Inspect this dataframe to | |
#' determine what inputs are accepted to `units::make_unit` or | |
#' `RemTools::make_units2`. Any entry listed as a `name` or `symbol` (including | |
#' alias names and symbols) are accepted. Additionaly, any symbols can also | |
#' contain a valid prefix. | |
#' | |
#' @return a data frame | |
#' @export | |
#' | |
#' @importFrom xml2 xml_children xml_find_all xml_text xml_length xml_double | |
#' @name udunits-info | |
#' @examples | |
#' .udunits_symbols_info() | |
#' .udunits_prefix_info() | |
.udunits_symbols_info <- function() { | |
.get_ud_db_all() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment