Last active
June 23, 2018 18:25
-
-
Save fawda123/a1358a1ac60e5b8a2b9e0d1a3aa4fda2 to your computer and use it in GitHub Desktop.
limits of imputePSF, compared with na.mean on simulated time series
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(gridExtra) | |
library(grid) | |
library(imputeTestbench) | |
library(PSF) | |
library(imputePSF) | |
library(imputeTS) | |
library(scales) | |
# total obs in each simulated time series | |
n <- length(nottem) | |
# average of nottem for centering simulated data | |
avenot <- mean(nottem) | |
# create simulated data | |
# wvs is periodic only, nrms is random only, sim is combo of the two | |
simdat <- crossing( | |
mts = seq(0, 1, length = 25), | |
sds = seq(0, 8, length = 25) | |
) %>% | |
group_by(mts, sds) %>% | |
nest %>% | |
mutate( | |
wvs = map(mts, function(mts) ((as.numeric(nottem) - avenot) * mts) + avenot), | |
nrm = map(sds, function(sds) rnorm(n = n, sd = sds)) | |
) %>% | |
select(-data) %>% | |
mutate( | |
sim = pmap(list(wvs, nrm), function(wvs, nrm) wvs + nrm) | |
) | |
# run imputePSF with imputeTestbench, compare with replacement by means | |
simres <- simdat %>% | |
mutate( | |
errs = map(sim, impute_errors, | |
methods = c("impute_PSF", "na.mean"), # "na.approx", "na.locf", "na.mean"), | |
smps = 'mar', missPercentFrom = 30, missPercentTo = 30, blck = 100, | |
errorParameter = 'mape') | |
) | |
# optionally save and reload, simres takes a while | |
save(simres, file = 'simres.RData', compress = 'xz') | |
# load(file = 'simres.RData') | |
## | |
# plot output | |
# example plot for simulated observations, data prep | |
toplo <- simdat %>% | |
filter(mts %in% c(0, 0.5, 1) & sds %in% c(0, 4, 8)) %>% | |
mutate(ind = list(1:n)) %>% | |
unnest | |
# plot | |
p1 <- ggplot(toplo, aes(x = ind, y = sim)) + | |
geom_line(size = 0.25) + | |
geom_point(size = 0.5, alpha = 0.8, colour = 'black') + | |
facet_grid(mts ~ sds) + | |
theme_bw(base_family = 'serif', base_size = 14) + | |
theme( | |
strip.background = element_blank() | |
) + | |
scale_y_continuous('Simulated values') + | |
scale_x_continuous('Index') | |
# plot of imputation differences, data prep | |
pls <- simres %>% | |
mutate( | |
toplo = map(errs, as.data.frame) | |
) %>% | |
select(mts, sds, toplo) %>% | |
unnest %>% | |
select(-Parameter) %>% | |
mutate( | |
errdf = na.mean - impute_PSF, | |
dir = ifelse(errdf > 0, '+', '') | |
) | |
# ggplot base theme | |
pbase <- theme_bw(base_family = 'serif') + | |
theme( | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
axis.text.x = element_text(size = 8), | |
axis.text.y = element_text(size = 8), | |
legend.position = 'top', | |
legend.direction = 'horizontal', | |
plot.margin = unit(c(1,1,0,0), "lines"), | |
strip.background = element_blank(), | |
strip.text.y = element_text(angle = 0, hjust = 0, vjust = 0.5), | |
panel.background = element_rect(fill = 'black') | |
) | |
# plot | |
p2 <- ggplot(pls) + | |
geom_tile(aes(y = mts, x = sds, fill = errdf), colour = 'black') + | |
geom_text(aes(y = mts, x = sds, label = dir)) + | |
pbase + | |
scale_y_reverse('Increasing signal', expand = c(0, 0)) + | |
scale_x_continuous('Increasing noise', expand = c(0, 0)) + | |
scale_fill_gradient2('Error difference\n(mean replacement - imputePSF)', low = muted("red"), mid = "white", high = muted("green"), midpoint = 0) + | |
guides(fill = guide_colourbar(barheight = 0.5, barwidth = 8)) | |
gparg <- gpar(fontfamily = 'serif', cex = 1.2) | |
# first plot save as jpeg | |
jpeg('simex.jpg', height = 5, width = 6.5, units = 'in', res = 500, quality = 100) | |
grid.arrange( | |
arrangeGrob( | |
p1, | |
top = textGrob('Increasing noise', gp = gparg), | |
right = textGrob('Increasing periodicity', rot = 270, gp = gparg)) | |
) | |
dev.off() | |
# second plot save as jpeg | |
jpeg('simres.jpg', height = 4, 4.5, units = 'in', res = 500, quality = 100) | |
p2 | |
dev.off() | |
# R version 3.4.4 (2018-03-15) | |
# Platform: x86_64-w64-mingw32/x64 (64-bit) | |
# Running under: Windows >= 8 x64 (build 9200) | |
# | |
# Matrix products: default | |
# | |
# locale: | |
# [1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 | |
# [3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C | |
# [5] LC_TIME=English_United States.1252 | |
# | |
# attached base packages: | |
# [1] grid stats graphics grDevices utils datasets methods base | |
# | |
# other attached packages: | |
# [1] bindrcpp_0.2 scales_0.5.0.9000 imputeTS_2.5 imputePSF_0.1.0 PSF_0.4 | |
# [6] imputeTestbench_3.0.1 gridExtra_2.2.1 dplyr_0.7.3 purrr_0.2.3 readr_1.1.1 | |
# [11] tidyr_0.7.1 tibble_1.4.2 ggplot2_2.2.1.9000 tidyverse_1.1.1 | |
# | |
# loaded via a namespace (and not attached): | |
# [1] Rcpp_0.12.16 lubridate_1.6.0 lattice_0.20-35 zoo_1.8-0 digest_0.6.15 | |
# [6] assertthat_0.2.0 lmtest_0.9-35 psych_1.7.5 R6_2.2.2 cellranger_1.1.0 | |
# [11] plyr_1.8.4 httr_1.2.1 pillar_1.2.2 rlang_0.2.0.9001 lazyeval_0.2.1 | |
# [16] readxl_1.0.0 data.table_1.10.4-3 fracdiff_1.4-2 TTR_0.23-1 labeling_0.3 | |
# [21] stringr_1.2.0 foreign_0.8-69 munsell_0.4.3 broom_0.4.2 compiler_3.4.4 | |
# [26] modelr_0.1.0 pkgconfig_2.0.1 mnormt_1.5-5 forecast_8.1 tidyselect_0.2.0 | |
# [31] nnet_7.3-12 quadprog_1.5-5 withr_2.1.2 nlme_3.1-131.1 jsonlite_1.5 | |
# [36] gtable_0.2.0 magrittr_1.5 quantmod_0.4-10 stringi_1.1.5 reshape2_1.4.3 | |
# [41] tseries_0.10-42 timeDate_3012.100 xml2_1.1.1 xts_0.10-0 tools_3.4.4 | |
# [46] forcats_0.2.0 glue_1.1.1 hms_0.3 parallel_3.4.4 yaml_2.1.14 | |
# [51] colorspace_1.3-2 cluster_2.0.6 stinepack_1.3 rvest_0.3.2 bindr_0.1 | |
# [56] haven_1.0.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment