Last active
June 24, 2024 07:45
-
-
Save danlewer/41e382689ec3b95c77e862df5607384a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(lme4) # for ML/REML fitting of mixed models | |
# ---------------------------------------------------------------------------------------------- | |
# simulate a clustered dataset with cluster-level outcome affected by an individual "confounder" | |
# ---------------------------------------------------------------------------------------------- | |
# inputs | |
# ------ | |
# sample size | |
clusters <- 20 | |
cluster_size_range <- floor(exp(100:400/400) ^ 4 + 10) - 2 | |
# model parameters | |
intercept <- 27 | |
risk_factor_B <- 1.7 | |
sd_re <- 5 # standard deviation in random intercepts | |
# simulate data (note arbitrary variances) | |
# ---------------------------------------- | |
per_cluster <- sample(cluster_size_range, clusters, T) | |
N <- sum(per_cluster) | |
cluster_effect <- rnorm(clusters, 0, sd_re) | |
risk_factor_cmean <- rnorm(clusters, 10, 3) | |
d <- data.frame(id = rep(seq_len(clusters), per_cluster), | |
cluster_effect = rep(cluster_effect, per_cluster), | |
risk_factor_cmean = rep(risk_factor_cmean, per_cluster)) | |
d$risk_factor <- d$risk_factor_cmean + rnorm(N, 0, 3) | |
d$target <- intercept + d$cluster_effect + d$risk_factor * risk_factor_B | |
d$outcome <- rnorm(N, d$target, 4) | |
d[, c('cluster_effect', 'risk_factor_cmean', 'target')] <- list(NULL) | |
# visualise exposure & outcome | |
# ---------------------------- | |
par(mfrow = c(1, 2)) | |
plot(d$risk_factor, d$outcome, main = 'Individual level') | |
plot(x = tapply(d$risk_factor, d$id, mean), | |
y = tapply(d$outcome, d$id, mean), | |
main = 'Cluster level', xlab = 'Cluster risk factor mean', ylab = 'Cluster outcome mean') # cluster level | |
# -------------------------------- | |
# fit model with random intercepts | |
# -------------------------------- | |
m <- lmer(outcome ~ risk_factor + (1|id), data = d) | |
# ----------------------- | |
# calculate cluster means | |
# ----------------------- | |
# crude outcome | |
# ------------- | |
means <- tapply(d$outcome, d$id, function (x) c(mean(x), t.test(x)$conf.int[1:2])) | |
means <- do.call(rbind, means) | |
colnames(means) <- c('crude', 'crude_lower', 'crude_upper') | |
# risk factor | |
# ----------- | |
means <- cbind(data.frame(risk_factor = tapply(d$risk_factor, d$id, mean)), means) | |
# bootstrap confidence intervals for standardised outcomes | |
# -------------------------------------------------------- | |
# simple example of bootMer function | |
fboot <- function (.) fixef(.)[2] # extract fixed effect beta for risk factor from model | |
fboot(m) # beta | |
B <- bootMer(m, fboot, nsim = 500) # bootstraps of beta | |
c(fboot(m), confint(B)) # confidence intervals using the percentile method | |
confint(m) # compare confint.merMod, which defaults to the profile method | |
# bootstrap confidence intervals for standardised outcome means (this takes a while) | |
mci <- function (cluster = 1, B = 500) { | |
nd <- data.frame(risk_factor = d$risk_factor, id = cluster) | |
fboot <- function (.) mean(predict(., newdata = nd)) | |
r <- bootMer(m, fboot, nsim = B, use.u = T) | |
c(r$t0, confint(r, type = 'perc')) # defaults to the percentile method. the choice of method is non trivial | |
} | |
bootRes <- sapply(seq_len(clusters), function (x) { | |
print (x) | |
mci (x) | |
}) | |
bootRes <- `colnames<-`(t(bootRes), c('bootPoint', 'bootLower', 'bootUpper')) | |
means <- cbind(means, bootRes) | |
# compare risk factor and effect on standardised outcome (expect lower than average risk factor to cause reduction in standardised estimate and vice versa) | |
par(mfrow = c(1, 1)) | |
with(means, plot(risk_factor, crude - bootPoint, xlab = 'Mean risk factor', ylab = 'change from crude to standardised outcome')) | |
# ------------------- | |
# order by crude mean | |
# ------------------- | |
means <- means[order(means$crude),] | |
# -------------------------------------- | |
# visualise crude and standardised means | |
# -------------------------------------- | |
means$y <- 1:nrow(means) | |
jit <- 0.1 | |
cols <- c('red', 'blue') | |
plot(1, type = 'n', xlim = range(means[, 2:7]), ylim = c(0, clusters), xlab = 'Cluster mean', ylab = 'Cluster') | |
with(means, { | |
points(crude, y-jit, pch = 19, col = cols[1]) | |
arrows(crude_lower, y-jit, crude_upper, y-jit, code = 3, angle = 90, length = 0.05, col = cols[1]) | |
points(bootPoint, y+jit, pch = 19, col = cols[2]) | |
arrows(bootLower, y+jit, bootUpper, y+jit, code = 3, angle = 90, length = 0.05, col = cols[2]) | |
}) | |
abline(v = mean(d$outcome)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment