Last active
September 26, 2022 09:45
-
-
Save strengejacke/eb607b015b8041f378d56e844d5d38cf to your computer and use it in GitHub Desktop.
Example showing different ways of modelling ordinal predictors, and how their model summaries and predictions look like.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggeffects) | |
library(parameters) | |
library(brms) | |
income_options <- c("below_20", "20_to_40", "40_to_100", "greater_100") | |
income <- factor(sample(income_options, 100, TRUE), | |
levels = income_options, ordered = TRUE) | |
mean_ls <- c(30, 60, 70, 75) | |
ls <- mean_ls[income] + rnorm(100, sd = 7) | |
dat <- data.frame(income, ls) | |
dat$income2 <- dat$income | |
contrasts(dat$income2) <- MASS::contr.sdif | |
dat$income3 <- factor(dat$income, ordered = FALSE) | |
dat$income_n <- as.numeric(dat$income) | |
# fit a simple monotonic model | |
fit1 <- brm(ls ~ mo(income), data = dat, refresh = 0, iter = 200, chains = 2) | |
fit2 <- lm(ls ~ income, data = dat) | |
fit3 <- lm(ls ~ income2, data = dat) | |
fit4 <- lm(ls ~ income3, data = dat) | |
fit5 <- lm(ls ~ poly(income_n, 3), data = dat) | |
model_parameters(fit1) | |
#> # Fixed effects | |
#> | |
#> Parameter | Median | 95% CI | pd | % in ROPE | Rhat | ESS | |
#> ------------------------------------------------------------------------ | |
#> (Intercept) | 27.34 | [23.35, 29.78] | 100% | 0% | 0.991 | 51.00 | |
#> moincome | 15.76 | [14.76, 17.36] | 100% | 0% | 0.995 | 64.00 | |
#> | |
#> # Fixed effects (monotonic effects) | |
#> | |
#> Parameter | Median | 95% CI | pd | % in ROPE | Rhat | ESS | |
#> ---------------------------------------------------------------------- | |
#> income1[1] | 0.72 | [0.66, 0.80] | 100% | 100% | 1.005 | 187.00 | |
#> income1[2] | 0.21 | [0.11, 0.30] | 100% | 100% | 0.994 | 151.00 | |
#> income1[3] | 0.06 | [0.00, 0.12] | 100% | 100% | 0.996 | 107.00 | |
#> | |
#> Uncertainty intervals (highest-density) and p values (two-tailed) computed | |
#> using a MCMC distribution approximation. | |
compare_parameters("ordinal" = fit2, "successive diff" = fit3, | |
"categorical" = fit4, "poly" = fit5) | |
#> Parameter | ordinal | successive diff | categorical | poly | |
#> ----------------------------------------------------------------------------------------------------------------------- | |
#> (Intercept) | 58.85 ( 57.46, 60.24) | 58.85 (57.46, 60.24) | 27.21 (24.45, 29.96) | 60.30 ( 58.97, 61.62) | |
#> income (linear) | 34.03 ( 31.50, 36.57) | | | | |
#> income (quadratic) | -15.92 (-18.69, -13.15) | | | | |
#> income (cubic) | 3.81 ( 0.81, 6.80) | | | | |
#> income2 (2-1) | | 34.55 (30.65, 38.45) | | | |
#> income2 (3-2) | | 10.12 ( 5.81, 14.42) | | | |
#> income2 (4-3) | | 2.70 (-1.24, 6.64) | | | |
#> income3 (20 to 40) | | | 34.55 (30.65, 38.45) | | |
#> income3 (40 to 100) | | | 44.66 (40.36, 48.97) | | |
#> income3 (greater 100) | | | 47.36 (43.87, 50.86) | | |
#> income n (1st degree) | | | | 169.61 (156.39, 182.84) | |
#> income n (2nd degree) | | | | -77.89 (-91.11, -64.67) | |
#> income n (3rd degree) | | | | 16.82 ( 3.60, 30.04) | |
#> ----------------------------------------------------------------------------------------------------------------------- | |
#> Observations | 100 | 100 | 100 | 100 | |
# brms | |
ggpredict(fit1, "income") | |
#> # Predicted values of ls | |
#> | |
#> income | Predicted | 95% CI | |
#> ---------------------------------------- | |
#> below_20 | 27.34 | [23.74, 30.13] | |
#> 20_to_40 | 61.65 | [58.90, 64.53] | |
#> 40_to_100 | 71.72 | [68.14, 74.71] | |
#> greater_100 | 74.68 | [72.39, 76.82] | |
# raw ordinal | |
ggpredict(fit2, "income") | |
#> # Predicted values of ls | |
#> | |
#> income | Predicted | 95% CI | |
#> ---------------------------------------- | |
#> below_20 | 27.21 | [24.48, 29.93] | |
#> 20_to_40 | 61.75 | [59.03, 64.48] | |
#> 40_to_100 | 71.87 | [68.61, 75.13] | |
#> greater_100 | 74.57 | [72.45, 76.69] | |
# ordinal, successive differences | |
ggpredict(fit3, "income2") | |
#> # Predicted values of ls | |
#> | |
#> income2 | Predicted | 95% CI | |
#> ---------------------------------------- | |
#> below_20 | 27.21 | [24.48, 29.93] | |
#> 20_to_40 | 61.75 | [59.03, 64.48] | |
#> 40_to_100 | 71.87 | [68.61, 75.13] | |
#> greater_100 | 74.57 | [72.45, 76.69] | |
# categorical | |
ggpredict(fit4, "income3") | |
#> # Predicted values of ls | |
#> | |
#> income3 | Predicted | 95% CI | |
#> ---------------------------------------- | |
#> below_20 | 27.21 | [24.48, 29.93] | |
#> 20_to_40 | 61.75 | [59.03, 64.48] | |
#> 40_to_100 | 71.87 | [68.61, 75.13] | |
#> greater_100 | 74.57 | [72.45, 76.69] | |
# poly | |
ggpredict(fit5, "income_n") | |
#> # Predicted values of ls | |
#> | |
#> income_n | Predicted | 95% CI | |
#> ------------------------------------- | |
#> 1 | 27.21 | [24.48, 29.93] | |
#> 2 | 61.75 | [59.03, 64.48] | |
#> 3 | 71.87 | [68.61, 75.13] | |
#> 4 | 74.57 | [72.45, 76.69] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment