Last active
April 30, 2024 20:09
-
-
Save gavinsimpson/8a0f0e072b095295cf5f7af2762e05a7 to your computer and use it in GitHub Desktop.
A quick R script I knocked up to compare the glmmTMB and mgcv packages for fitting zero-inflated GLMMs to the Salamander and Owls data sets from Brooks et al (2017)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Compare Brooks et al glmmTMB paper with mgcv | |
## Packages | |
library("glmmTMB") | |
library("mgcv") | |
library("ggplot2") | |
theme_set(theme_bw()) | |
library("ggstance") | |
## Salamander | |
data(Salamanders, package = "glmmTMB") | |
## Poisson Models | |
pgam0 <- gam(count ~ spp + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML") | |
pgam1 <- gam(count ~ spp + mined + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML") | |
pgam2 <- gam(count ~ spp * mined + s(site, bs = "re"), data = Salamanders, family = poisson, method = "ML") | |
pm0 <- glmmTMB(count ~ spp + (1 | site), data = Salamanders, family = poisson) | |
pm1 <- glmmTMB(count ~ spp + mined + (1 | site), data = Salamanders, family = poisson) | |
pm2 <- glmmTMB(count ~ spp * mined + (1 | site), data = Salamanders, family = poisson) | |
AIC(pgam0, pgam1, pgam2) | |
AIC(pm0, pm1, pm2) | |
## Negative binomial models | |
nbgam0 <- gam(count ~ spp + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML") | |
nbgam1 <- gam(count ~ spp + mined + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML") | |
nbgam2 <- gam(count ~ spp * mined + s(site, bs = "re"), data = Salamanders, family = nb, method = "ML") | |
nbm0 <- glmmTMB(count ~ spp + (1 | site), data = Salamanders, family = nbinom2) | |
nbm1 <- glmmTMB(count ~ spp + mined + (1 | site), data = Salamanders, family = nbinom2) | |
nbm2 <- glmmTMB(count ~ spp * mined + (1 | site), data = Salamanders, family = nbinom2) | |
AIC(nbgam0, nbgam1, nbgam2) | |
AIC(nbm0, nbm1, nbm2) | |
## Zero-inflated Poisson | |
## mgcv's ziplss can only fit using REML | |
zipgam0 <- gam(list(count ~ spp + s(site, bs = "re"), ~ spp), | |
data = Salamanders, family = ziplss, method = "REML") | |
zipgam1 <- gam(list(count ~ spp + mined + s(site, bs = "re"), ~ spp), | |
data = Salamanders, family = ziplss, method = "REML") | |
zipgam2 <- gam(list(count ~ spp + mined + s(site, bs = "re"), ~ spp + mined), | |
data = Salamanders, family = ziplss, method = "REML") | |
zipgam3 <- gam(list(count ~ spp * mined + s(site, bs = "re"), ~ spp * mined), | |
data = Salamanders, family = ziplss, method = "REML") | |
## check the things converged | |
zipgam0$outer.info | |
zipgam1$outer.info | |
zipgam2$outer.info | |
zipgam3$outer.info | |
zipm0 <- glmmTMB(count ~ spp + (1 | site), zi = ~ spp, data = Salamanders, family = poisson) | |
zipm1 <- glmmTMB(count ~ spp + mined + (1 | site), zi = ~ spp, data = Salamanders, family = poisson) | |
zipm2 <- glmmTMB(count ~ spp + mined + (1 | site), zi = ~ spp + mined, data = Salamanders, family = poisson) | |
zipm3 <- glmmTMB(count ~ spp * mined + (1 | site), zi = ~ spp * mined, data = Salamanders, family = poisson) | |
AIC(zipgam0, zipgam1, zipgam2, zipgam3) | |
AIC(zipm0, zipm1, zipm2, zipm3) | |
## Newdata | |
newd0 <- newd <- as.data.frame(cbind(unique(Salamanders[, c("mined","spp")]), site = "R -1")) | |
rownames(newd0) <- rownames(newd) <- NULL | |
pred <- predict(zipgam3, newd, exclude = "s(site)", type = "link") | |
beta <- coef(zipgam3) | |
consts <- beta[grep("Intercept", names(beta))] | |
ilink <- function(eta) { | |
## from stats::binomial(link = cloglog)$linkinv | |
pmax(pmin(-expm1(-exp(eta)), 1 - .Machine$double.eps), .Machine$double.eps) | |
} | |
newd <- transform(newd, fitted = exp(pred[,1]) * ilink(pred[,2])) | |
ggplot(newd, aes(x = spp, y = fitted, colour = mined)) + | |
geom_point() | |
## Owls | |
data(Owls, package = "glmmTMB") | |
names(Owls) <- sub("SiblingNegotiation", "NCalls", names(Owls)) | |
Owls <- transform(Owls, cArrivalTime = ArrivalTime - mean(ArrivalTime)) | |
### constant zero-inflation | |
system.time({m1.tmb <- glmmTMB(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + (1 | Nest), | |
ziformula = ~ 1, data = Owls, family = poisson)}) | |
## mgcv's ziplss can only fit using REML | |
system.time({m1.gam <- gam(list(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + s(Nest, bs = "re"), | |
~ 1), data = Owls, family = ziplss(), method = "REML")}) | |
createCoeftab <- function(TMB, GAM) { | |
bTMB <- fixef(TMB)$cond[-1] | |
bGAM <- coef(GAM)[2:6] | |
seTMB <- diag(vcov(TMB)$cond)[-1] | |
seGAM <- diag(vcov(GAM))[2:6] | |
nms <- names(bTMB) | |
nms <- sub("FoodTreatment", "FT", nms) | |
nms <- sub("cArrivalTime", "ArrivalTime", nms) | |
df <- data.frame(model = rep(c("glmmTMB", "mgcv::gam"), each = 5), | |
term = rep(nms, 2), | |
estimate = unname(c(bTMB, bGAM))) | |
df <- transform(df, | |
upper = estimate + sqrt(c(seTMB, seGAM)), | |
lower = estimate - sqrt(c(seTMB, seGAM))) | |
df | |
} | |
m1.coefs <- createCoeftab(m1.tmb, m1.gam) | |
p1 <- ggplot(m1.coefs, aes(x = estimate, y = term, colour = model, shape = model, xmax = upper, xmin = lower)) + | |
geom_pointrangeh(position = position_dodgev(height = 0.3)) + | |
labs(y = NULL, | |
x = "Regression estimate", | |
title = "Comparing mgcv with glmmTMB", | |
subtitle = "Owls: ZIP with constant zero-inflation", | |
caption = "Bars are ±1 SE") | |
### Complex zero-inflation | |
system.time({m2.tmb <- glmmTMB(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + (1 | Nest), | |
ziformula = ~ FoodTreatment + (1 | Nest), data = Owls, family = poisson)}) | |
## mgcv's ziplss can only fit using REML | |
system.time({m2.gam <- gam(list(NCalls ~ (FoodTreatment + cArrivalTime) * SexParent + offset(logBroodSize) + s(Nest, bs = "re"), | |
~ FoodTreatment + s(Nest, bs = "re")), data = Owls, family = ziplss(), method = "REML")}) | |
m2.coefs <- createCoeftab(m2.tmb, m2.gam) | |
p2 <- ggplot(m2.coefs, aes(x = estimate, y = term, colour = model, shape = model, xmax = upper, xmin = lower)) + | |
geom_pointrangeh(position = position_dodgev(height = 0.3)) + | |
labs(y = NULL, | |
x = "Regression estimate", | |
title = "Comparing mgcv with glmmTMB", | |
subtitle = "Owls: ZIP with complex zero-inflation", | |
caption = "Bars are ±1 SE") | |
ggsave("~/Downloads/owls-comparison-simple-zip.png", p1, width = 7, height = 7, dpi = 150) | |
ggsave("~/Downloads/owls-comparison-complex-zip.png", p2, width = 7, height = 7, dpi = 150) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for letting us know about this. I'll add
mgcv
to the next draft of the manuscript.I added
mgcv
to the negative binomial timing comparisons. It's definitely faster for the original data structure, but not with many more random effect levels. I'm guessing thatTMB
's automatic sparseness detection gives it the advantage.