Last active
February 16, 2023 08:31
-
-
Save ito4303/4d5f777b684c0084a4aae0884607bcdd to your computer and use it in GitHub Desktop.
Zero-inflated beta regression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Zero-inflated beta regression" | |
output: html_notebook | |
--- | |
## Setup | |
```{r setup} | |
library(ggplot2) | |
library(zoib) | |
library(cmdstanr) | |
library(posterior) | |
library(bayesplot) | |
``` | |
### Data generation | |
$$ | |
z \sim \mathrm{Bern}(p) \\ | |
\mathrm{logit}(p) = \alpha_0 + \alpha_1 x \\ | |
y = 0 \quad \text{if} \, z = 0 \\ | |
y \sim \mathrm{Beta}(\mu, \kappa) \quad \text{if} \, z = 1\\ | |
\mathrm{logit}(\mu) = \beta_0 + \beta_1 x | |
$$ | |
```{r} | |
set.seed(1234) | |
inv_logit <- function(x) { | |
1 / (1 + exp(-x)) | |
} | |
N <- 100 | |
X <- runif(N, 0, 10) | |
p <- inv_logit(-2 + 2 * X) | |
mu <-inv_logit(-4.5 + 0.7 * X) | |
kappa <- 6 | |
alpha <- mu * kappa | |
beta <- (1 - mu) * kappa | |
z <- rbinom(N, 1, p) | |
Y <- z * rbeta(N, alpha, beta) | |
sim_data <- data.frame(X = X, Y = Y) | |
``` | |
### View data | |
```{r} | |
p0 <- ggplot(sim_data, aes(x = X, y = Y)) + | |
geom_point() | |
print(p0) | |
``` | |
## Models | |
### Linear regression | |
```{r} | |
fit1 <- lm(Y ~ X, data = sim_data) | |
summary(fit1) | |
``` | |
Plot results | |
```{r} | |
p0 + | |
geom_abline(intercept = coef(fit1)[1], | |
slope = coef(fit1)[2]) | |
``` | |
## Zero-inflated beta regression using the zoib package | |
```{r} | |
fit2 <- zoib(Y ~ X | 1 | X, data = sim_data, | |
zero.inflation = TRUE, one.inflation = FALSE, | |
n.chain = 3, n.iter = 10000, n.burn = 2000, n.thin = 8) | |
samp <- fit2$coeff | |
traceplot(samp) | |
summary(samp) | |
``` | |
Check JAGS model | |
```{r} | |
fit2$MCMC.model | |
``` | |
If y is smaller than 0.0001, y is treated as 0. | |
## Zero-inflated beta regression using Stan | |
```{r} | |
model_file <- file.path("models", "zib_regression.stan") | |
model <- cmdstan_model(model_file) | |
stan_data <- list(N = N, X = X, Y = Y) | |
fit3 <- model$sample(data = stan_data) | |
fit3$print(variables = c("alpha", "beta", "kappa"), digits = 2) | |
``` | |
Posterior predictive check | |
```{r} | |
yrep <- fit3$draws("yrep") |> | |
as_draws_matrix() | |
ppc_dens_overlay(y = Y, yrep = yrep[1:100, ]) | |
``` |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
int<lower=0> N; // number of data points | |
vector[N] X; // explanatory variable | |
vector<lower=0,upper=1>[N] Y; // objective variable | |
} | |
parameters { | |
array[2] real alpha; // intercept and slope in the probability model | |
// (logit scale) | |
array[2] real beta; // intercept and slope in the mean model (logit scale) | |
real<lower=0> kappa; // precision parameter | |
} | |
transformed parameters { | |
vector[N] logit_p = alpha[1] + alpha[2] * X; | |
vector<lower=0,upper=1>[N] mu = inv_logit(beta[1] + beta[2] * X); | |
} | |
model { | |
for (n in 1:N) { | |
if (Y[n] == 0) | |
target += bernoulli_logit_lpmf(0 | logit_p[n]); | |
else | |
target += bernoulli_logit_lpmf(1 | logit_p[n]) | |
+ beta_proportion_lpdf(Y[n] | mu[n], kappa); | |
} | |
// priors | |
alpha ~ normal(0, 10); | |
beta ~ normal(0, 10); | |
} | |
generated quantities { | |
vector<lower=0,upper=1>[N] yrep; | |
for (n in 1:N) { | |
int z = bernoulli_logit_rng(logit_p[n]); | |
yrep[n] = z * beta_proportion_rng(mu[n], kappa); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment