ito4303 · February 16, 2023 08:31
diff --git a/zib_regression.Rmd b/zib_regression.Rmd
 ---
 title: "Zero-inflated beta regression"
 output: html_notebook
 ---

 ## Setup

 ```{r setup}
 library(ggplot2)
 library(zoib)
 library(cmdstanr)
 library(posterior)
 library(bayesplot)
 ```

 ### Data generation

 $$
 z \sim \mathrm{Bern}(p) \\
 \mathrm{logit}(p) = \alpha_0 + \alpha_1 x \\
 y = 0 \quad \text{if} \, z = 0 \\
 y \sim \mathrm{Beta}(\mu, \kappa) \quad \text{if} \, z = 1\\
 \mathrm{logit}(\mu) = \beta_0 + \beta_1 x
 $$


 ```{r}
 set.seed(1234)
 inv_logit <- function(x) {
  1 / (1 + exp(-x))
 }
 N <- 100
 X <- runif(N, 0, 10)
 p <- inv_logit(-2 + 2 * X)
 mu <-inv_logit(-4.5 + 0.7 * X)
 kappa <- 6
 alpha <- mu * kappa
 beta <- (1 - mu) * kappa
 z <- rbinom(N, 1, p)
 Y <- z * rbeta(N, alpha, beta)
 sim_data <- data.frame(X = X, Y = Y)
 ```

 ### View data

 ```{r}
 p0 <- ggplot(sim_data, aes(x = X, y = Y)) +
  geom_point()
 print(p0)
 ```

 ## Models

 ### Linear regression

 ```{r}
 fit1 <- lm(Y ~ X, data = sim_data)
 summary(fit1)
 ```

 Plot results

 ```{r}
 p0 +
  geom_abline(intercept = coef(fit1)[1],
              slope = coef(fit1)[2])
 ```

 ## Zero-inflated beta regression using the zoib package

 ```{r}
 fit2 <- zoib(Y ~ X | 1 | X, data = sim_data,
             zero.inflation = TRUE, one.inflation = FALSE,
             n.chain = 3, n.iter = 10000, n.burn = 2000, n.thin = 8)
 samp <- fit2$coeff
 traceplot(samp)
 summary(samp)
 ```

 Check JAGS model

 ```{r}
 fit2$MCMC.model
 ```

 If y is smaller than 0.0001, y is treated as 0.


 ## Zero-inflated beta regression using Stan

 ```{r}
 model_file <- file.path("models", "zib_regression.stan")
 model <- cmdstan_model(model_file)
 stan_data <- list(N = N, X = X, Y = Y)
 fit3 <- model$sample(data = stan_data)
 fit3$print(variables = c("alpha", "beta", "kappa"), digits = 2)
 ```


 Posterior predictive check

 ```{r}
 yrep <- fit3$draws("yrep") |>
  as_draws_matrix()
 ppc_dens_overlay(y = Y, yrep = yrep[1:100, ])
 ```
diff --git a/zib_regression.stan b/zib_regression.stan
 data {
  int<lower=0> N;               // number of data points
  vector[N] X;                  // explanatory variable
  vector<lower=0,upper=1>[N] Y; // objective variable
 }

 parameters {
  array[2] real alpha; // intercept and slope in the probability model
                       // (logit scale)
  array[2] real beta;  // intercept and slope in the mean model (logit scale)
  real<lower=0> kappa; // precision parameter
 }

 transformed parameters {
  vector[N] logit_p = alpha[1] + alpha[2] * X;
  vector<lower=0,upper=1>[N] mu = inv_logit(beta[1] + beta[2] * X);
 }

 model {
  for (n in 1:N) {
    if (Y[n] == 0)
      target += bernoulli_logit_lpmf(0 | logit_p[n]);
    else
      target += bernoulli_logit_lpmf(1 | logit_p[n])
                  + beta_proportion_lpdf(Y[n] | mu[n], kappa);
  }

  // priors
  alpha ~ normal(0, 10);
  beta ~ normal(0, 10);
 }

 generated quantities {
  vector<lower=0,upper=1>[N] yrep;

  for (n in 1:N) {
    int z = bernoulli_logit_rng(logit_p[n]);
    yrep[n] = z * beta_proportion_rng(mu[n], kappa);
  }
 }
	---
	title: "Zero-inflated beta regression"
	output: html_notebook
	---

	## Setup

	```{r setup}
	library(ggplot2)
	library(zoib)
	library(cmdstanr)
	library(posterior)
	library(bayesplot)
	```

	### Data generation

	$$
	z \sim \mathrm{Bern}(p) \\
	\mathrm{logit}(p) = \alpha_0 + \alpha_1 x \\
	y = 0 \quad \text{if} \, z = 0 \\
	y \sim \mathrm{Beta}(\mu, \kappa) \quad \text{if} \, z = 1\\
	\mathrm{logit}(\mu) = \beta_0 + \beta_1 x
	$$


	```{r}
	set.seed(1234)
	inv_logit <- function(x) {
	1 / (1 + exp(-x))
	}
	N <- 100
	X <- runif(N, 0, 10)
	p <- inv_logit(-2 + 2 * X)
	mu <-inv_logit(-4.5 + 0.7 * X)
	kappa <- 6
	alpha <- mu * kappa
	beta <- (1 - mu) * kappa
	z <- rbinom(N, 1, p)
	Y <- z * rbeta(N, alpha, beta)
	sim_data <- data.frame(X = X, Y = Y)
	```

	### View data

	```{r}
	p0 <- ggplot(sim_data, aes(x = X, y = Y)) +
	geom_point()
	print(p0)
	```

	## Models

	### Linear regression

	```{r}
	fit1 <- lm(Y ~ X, data = sim_data)
	summary(fit1)
	```

	Plot results

	```{r}
	p0 +
	geom_abline(intercept = coef(fit1)[1],
	slope = coef(fit1)[2])
	```

	## Zero-inflated beta regression using the zoib package

	```{r}
	fit2 <- zoib(Y ~ X \| 1 \| X, data = sim_data,
	zero.inflation = TRUE, one.inflation = FALSE,
	n.chain = 3, n.iter = 10000, n.burn = 2000, n.thin = 8)
	samp <- fit2$coeff
	traceplot(samp)
	summary(samp)
	```

	Check JAGS model

	```{r}
	fit2$MCMC.model
	```

	If y is smaller than 0.0001, y is treated as 0.


	## Zero-inflated beta regression using Stan

	```{r}
	model_file <- file.path("models", "zib_regression.stan")
	model <- cmdstan_model(model_file)
	stan_data <- list(N = N, X = X, Y = Y)
	fit3 <- model$sample(data = stan_data)
	fit3$print(variables = c("alpha", "beta", "kappa"), digits = 2)
	```


	Posterior predictive check

	```{r}
	yrep <- fit3$draws("yrep") \|>
	as_draws_matrix()
	ppc_dens_overlay(y = Y, yrep = yrep[1:100, ])
	```
	data {
	int<lower=0> N; // number of data points
	vector[N] X; // explanatory variable
	vector<lower=0,upper=1>[N] Y; // objective variable
	}

	parameters {
	array[2] real alpha; // intercept and slope in the probability model
	// (logit scale)
	array[2] real beta; // intercept and slope in the mean model (logit scale)
	real<lower=0> kappa; // precision parameter
	}

	transformed parameters {
	vector[N] logit_p = alpha[1] + alpha[2] * X;
	vector<lower=0,upper=1>[N] mu = inv_logit(beta[1] + beta[2] * X);
	}

	model {
	for (n in 1:N) {
	if (Y[n] == 0)
	target += bernoulli_logit_lpmf(0 \| logit_p[n]);
	else
	target += bernoulli_logit_lpmf(1 \| logit_p[n])
	+ beta_proportion_lpdf(Y[n] \| mu[n], kappa);
	}

	// priors
	alpha ~ normal(0, 10);
	beta ~ normal(0, 10);
	}

	generated quantities {
	vector<lower=0,upper=1>[N] yrep;

	for (n in 1:N) {
	int z = bernoulli_logit_rng(logit_p[n]);
	yrep[n] = z * beta_proportion_rng(mu[n], kappa);
	}
	}