Last active
March 23, 2021 18:09
-
-
Save SachaEpskamp/9e9aee4be51dcfa425f70d9b608d55d0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script contains some functions to automate things: | |
cat("THIS FUNCTION IS OUTDATED, PLEASE SEE https://github.com/SachaEpskamp/RIVMgrowth") | |
# Function to automize dummy encoding: | |
rivm_lgc_dummy <- function( | |
data, # Dataset | |
design, # Design matrix, as in psychonetrics | |
type = c("non-linear","linear"), # Type of analysis to do | |
predictor_var, # Variable name of predictor (only one supported at the moment) | |
time # time vector encoding distances in time | |
){ | |
library("dplyr") | |
# Check data: | |
stopifnot(is.data.frame(data)) | |
# Check design matrix: | |
allVars <- unlist(design) | |
allVars <- allVars[!is.na(allVars)] | |
if (!all(allVars %in% names(data))){ | |
stop("Not all names in the design matrix correspond to variables in the dataset.") | |
} | |
# Extract from design: | |
if (!is.matrix(design)){ | |
design <- t(design) | |
} | |
nVar <- nrow(design) | |
nWave <- ncol(design) | |
# Names of design: | |
if (is.null(rownames(design))){ | |
rownames(design) <- paste0("Var_",seq_len(nVar)) | |
} | |
variables <- rownames(design) | |
# Time vector: | |
if (missing(time)){ | |
time <- seq_len(nWave) | |
} | |
# Type of analysis: | |
type <- match.arg(type) | |
# Check if predictor is present: | |
use_predictor <- !missing(predictor_var) | |
# Setup dummy: | |
if (use_predictor){ | |
# Check data: | |
if (!predictor_var %in% names(data)){ | |
stop("predictor_var does not correspond to a variable in the dataset") | |
} | |
# Make factor if needed: | |
if (!is.factor(data[[predictor_var]])){ | |
data[[predictor_var]] <- as.factor(data[[predictor_var]]) | |
} | |
# Make dummy variables: | |
predictor_levels <- levels(data[[predictor_var]]) | |
nlevels_predictor <- length(predictor_levels) | |
nDummy <- nlevels_predictor-1 | |
# Dummy coding: | |
dummyvars <- paste0("DUMMY_",seq_len(nDummy)) | |
for (d in seq_len(nDummy)){ | |
data[[dummyvars[[d]]]] <- 1* (data[[predictor_var]] == predictor_levels[d+1]) | |
} | |
} else { | |
nDummy <- 0 | |
dummyvars <- character(0) | |
} | |
# Names of parameters needed in the model: | |
intercepts <- paste0("int_",variables) | |
slopes <- paste0("slope_",variables) | |
residual <- paste0("resid_",variables) | |
# Slope parameters: | |
if (type == "non-linear"){ | |
slope_pars <- lapply(variables,function(x){ | |
c(0,1,paste0("slope_",x,"_",3:nWave)) | |
}) | |
names(slope_pars) <- variables | |
} else { | |
slope_pars <- lapply(variables,function(x){ | |
time - time[1] | |
}) | |
names(slope_pars) <- variables | |
} | |
# Intercept mean parameters: | |
int_mean_pars <- lapply(variables,function(x){ | |
paste0("intercept_model_",x,"_",seq_len(nDummy+1)-1) | |
}) | |
names(int_mean_pars) <- variables | |
# Slope mean parameters: | |
slope_mean_pars <- lapply(variables,function(x){ | |
paste0("slope_model_",x,"_",seq_len(nDummy+1)-1) | |
}) | |
names(slope_mean_pars) <- variables | |
# Start forming the model: | |
mod <- character(0) | |
cur_line <- 0 | |
# loop over variables: | |
for (v in seq_len(nVar)){ | |
# Variables from the design matrix: | |
curvars <- design[v,] | |
# Which are not NA? | |
incl <- !is.na(curvars) | |
inclvars <- which(incl) | |
# Cut out NA: | |
curvars <- curvars[!is.na(curvars)] | |
# Update the line: | |
cur_line <- cur_line + 2 | |
# Header: | |
mod[cur_line] <- paste0("# Intercepts for variable ",variables[v]) | |
# Update the line: | |
cur_line <- cur_line + 1 | |
# Write intercepts: | |
mod[cur_line] <- paste0(intercepts[v], " =~ ", paste0("1 * ",curvars, collapse = " + ")) | |
# Update the line: | |
cur_line <- cur_line + 1 | |
# Now model the intercept: | |
mod[cur_line] <- paste0(intercepts[v], " ~ ", paste0(int_mean_pars[[v]],"*",c("1", dummyvars), collapse = " + ")) | |
# Update the line: | |
cur_line <- cur_line + 2 | |
# Header: | |
mod[cur_line] <- paste0("# Slopes for variable ",variables[v]) | |
# Update the line: | |
cur_line <- cur_line + 1 | |
# Write slopes: | |
mod[cur_line] <- paste0(slopes[v], " =~ ", paste0(slope_pars[[v]][incl]," * ",curvars, collapse = " + ")) | |
# Update the line: | |
cur_line <- cur_line + 1 | |
# Now model the slopes: | |
mod[cur_line] <- paste0(slopes[v], " ~ ", paste0(slope_mean_pars[[v]],"*",c("1", dummyvars), collapse = " + ")) | |
# Update the line: | |
cur_line <- cur_line + 2 | |
# Header: | |
mod[cur_line] <- paste0("# Residuals for variable ",variables[v]) | |
# Write residuals: | |
for (i in seq_along(curvars)){ | |
# Update the line: | |
cur_line <- cur_line + 1 | |
mod[cur_line] <- paste0(curvars[i], " ~~ ", residual[v]," * ",curvars[i]) | |
} | |
} | |
# Now add all expected values of all groups at all waves for all variables: | |
if (use_predictor){ | |
ev_df <- expand.grid( | |
predictor = seq_along(predictor_levels), | |
wave = seq_len(nWave), | |
var = variables, | |
stringsAsFactors = FALSE | |
) | |
names(ev_df) <- c("predictor","wave","var") | |
# Add dummies: | |
for (d in seq_len(nDummy)){ | |
ev_df[[dummyvars[[d]]]] <- 1* (predictor_levels[ev_df$predictor] == predictor_levels[d+1]) | |
} | |
# Parameter name: | |
ev_df$par <- paste0(ev_df$var,"_wave",ev_df$wave,"_",ev_df$predictor) | |
} else { | |
ev_df <- expand.grid( | |
wave = seq_len(nWave), | |
var = variables, | |
stringsAsFactors = FALSE | |
) | |
# Parameter name: | |
ev_df$par <- paste0(ev_df$var,"_wave",ev_df$wave) | |
} | |
# Update the line: | |
cur_line <- cur_line + 2 | |
# Header: | |
mod[cur_line] <- "# Dummy encoding" | |
# Loop over these to add: | |
for (i in seq_len(nrow(ev_df))){ | |
var <- ev_df$var[i] | |
wave <- ev_df$wave[i] | |
dummies <- unlist(ev_df[i,dummyvars]) | |
# Current parameter: | |
curpar <- ev_df$par[i] | |
# Update the line: | |
cur_line <- cur_line + 1 | |
# Write the line: | |
mod[cur_line] <- paste0( | |
curpar, " := ", paste0( | |
int_mean_pars[[var]], "*", c("1",dummies), | |
collapse = " + " | |
), " + ", | |
paste0( | |
slope_mean_pars[[var]], "*", c("1",dummies), " * ", slope_pars[[v]][wave], | |
collapse = " + " | |
) | |
) | |
} | |
# Fix empty lines: | |
mod[is.na(mod)] <- "" | |
# Aggregate: | |
model <- paste0(mod, collapse = "\n") | |
# Run lavaan model: | |
fit <- growth(model, data, missing = "fiml") | |
# Connect the parameter estimates to the expected value table: | |
parTable <- parameterEstimates(fit, standardized = TRUE) | |
# Join with the expected values: | |
ev_df <- ev_df %>% left_join(parTable, by = c("par" = "label")) | |
# Add time: | |
ev_df$time_of_wave <- time[ev_df$wave] | |
# Fix predictor: | |
if (use_predictor){ | |
ev_df$predictor <- predictor_levels[ev_df$predictor] | |
} | |
# Select columns: | |
if (use_predictor){ | |
ev_df <- ev_df[,c("var","wave","time_of_wave","predictor","est","se","ci.lower","ci.upper")] | |
} else { | |
ev_df <- ev_df[,c("var","wave","time_of_wave","est","se","ci.lower","ci.upper")] | |
} | |
# Significance of the regressions: | |
reg_df <- parTable %>% filter(op %in% c("~1", "~"), lhs %in% c(intercepts, slopes)) %>% | |
select(-label) | |
# Correlations between slopes: | |
cor_df <- parTable %>% filter(op == "~~", lhs %in% c(intercepts, slopes), rhs %in% c(intercepts, slopes)) %>% | |
select(lhs, op, rhs, est, se, z, pvalue, ci.lower, ci.upper, std.all) | |
# Make list: | |
results <- list( | |
lavresult = fit, | |
expected_values = ev_df, | |
regressions = reg_df, | |
correlations = cor_df, | |
time = time, | |
use_predictor = use_predictor | |
) | |
class(results) <- "rivm_lgc_dummy" | |
# Return: | |
return(results) | |
} | |
# Print method: | |
print.rivm_lgc_dummy <- function(x){ | |
cat("=== RIVM Latent Growth Curve Analysis ===","\n\n") | |
cat("Lavaan fit result: \n") | |
print(x$lavresult) | |
cat("\n\nLavaan fit measures: \n") | |
fit <- round(fitMeasures(x$lavresult)[c("chisq", "df", "pvalue", "cfi", "tli", "nnfi", "rfi", | |
"nfi", "pnfi", "ifi", "rni", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper", | |
"rmsea.pvalue", "srmr", "gfi", "agfi")], 2) | |
df <- data.frame( | |
index = names(fit), | |
value = fit | |
) | |
rownames(df) <- NULL | |
print(df, digits = 2) | |
cat("\n\nRegression coefficients: \n") | |
print(x$regressions, digits = 2) | |
cat("\n\n(co)variance and correlation coefficients: \n") | |
print(x$correlations, digits = 2) | |
cat("\n\nExpected values: \n") | |
print(x$expected_values, digits = 2) | |
cat("\n\n=========================================") | |
} | |
# Plot method: | |
plot.rivm_lgc_dummy <- function(x, predictor_label = "Predictor", wave_label = "Wave "){ | |
library("ggplot2") | |
if (x$use_predictor){ | |
p <- ggplot(x$expected_values, | |
# With aesthetics: | |
aes( | |
x = time_of_wave, # This is the variable that enocdes the time of the wave. | |
y = est, # The estimate | |
ymin = ci.lower, # If we want CIs | |
ymax = ci.upper, # If we want CIs | |
colour = predictor, | |
fill = predictor | |
)) + | |
# Nicer legend (and colorblind theme): | |
scale_colour_manual(predictor_label, | |
values = qgraph:::colorblind(length(unique(x$expected_values$predictor)))) | |
} else { | |
p <- ggplot(x$expected_values, | |
# With aesthetics: | |
aes( | |
x = time_of_wave, # This is the variable that enocdes the time of the wave. | |
y = est, # The estimate | |
ymin = ci.lower, # If we want CIs | |
ymax = ci.upper # If we want CIs | |
)) | |
} | |
p <- p + | |
# Seperate plot per variable: | |
facet_grid(var ~ ., scales = "free_y") + | |
# Add confidence region (uncomment to remove): | |
geom_ribbon(alpha = 0.1, show.legend = FALSE, colour = NA) + | |
# Add lines: | |
geom_line(lwd=1.2) + | |
# Add points: | |
geom_point(cex = 3, pch = 21, fill = "white", stroke = 1.5) + | |
# Nicer theme: | |
theme_bw() + | |
# Set x axis scale to have waves instead of numbers: | |
scale_x_continuous(breaks = sort(unique(x$expected_values$time_of_wave)), | |
labels = paste0(wave_label," ",sort(unique(x$expected_values$wave)))) + | |
# No labels: | |
xlab("") + | |
ylab("") + | |
# Larger fonts: | |
theme(text = element_text(size = 20), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) | |
return(p) | |
} | |
# Correlation plot: | |
corplot <- function(x, legend.position = "topright"){ | |
library("dplyr") | |
library("qgraph") | |
cors <- x$correlations | |
cors <- cors %>% filter(lhs != rhs) %>% select( | |
lhs = lhs, rhs = rhs, weight = std.all, pvalue = pvalue | |
) %>% | |
mutate( | |
lhs = gsub("\\_","\n",lhs), | |
rhs = gsub("\\_","\n",rhs) | |
) %>% mutate( | |
lhs = gsub("int\n","intercept\n",lhs), | |
rhs = gsub("int\n","intercept\n",rhs) | |
) | |
cors$lty <- 3 | |
cors$lty[cors$pvalue < 0.05] <- 2 | |
cors$lty[cors$pvalue < 0.01] <- 1 | |
qgraph(cors[,1:3], directed = FALSE, theme = "colorblind", | |
edge.labels = TRUE, lty = cors$lty, vsize = 14) | |
legend(legend.position, lty = c(1,2,3),legend = c("p < 0.01","p < 0.05","p > 0.05"), | |
bty = "n", cex = 1.5, lwd = 3) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment