englianhu · August 21, 2017 03:24
diff --git a/bivar_pois.R b/bivar_pois.R
 ## Simple bivariate Poisson model in stan
 ## following parameterization in Karlis and Ntzoufras 2003
 ## Simulate data
 n <- 50

 # indpendent Poisson components
 theta <- c(2, 3, 1)

 X_i <- array(dim=c(n, 3))
 for (i in 1:3){
  X_i[,i] <- rpois(n, theta[i])
 }

 # summation to produce bivariate Poisson RVs
 Y_1 <- X_i[, 1] + X_i[, 3]
 Y_2 <- X_i[, 2] + X_i[, 3]

 Y <- matrix(c(Y_1, Y_2), 
            ncol=n, 
            byrow=T)

 # Trick to generate a ragged array-esque set of indices for interior sum
 # we want to use a matrix operation to calculate the inner sum 
 # (from 0 to min(y_1[i], y_2[i])) for each of i observations
 minima <- apply(Y, 2, min)
 u <- NULL
 which_n <- NULL
 for (i in 1:n){
  indices <- 0:minima[i]
  u <- c(u, indices)
  which_n <- c(which_n, 
               rep(i, length(indices))
               )
 }

 length_u <- length(u) # should be sum(minima) + n

 # construct matrix of indicators to ensure proper summation
 u_mat <- array(dim=c(n, length_u))
 for (i in 1:n){
  u_mat[i, ] <- ifelse(which_n == i, 1, 0)
 }

 # plot data
 library(epade)
 par(mai=c(1.5, 1, 1, 1))
 bar3d.ade(Y_1, Y_2, wall=3)

 cat(
  "
 data {
  int<lower=1> n;                 // number of observations
  vector<lower=0>[n] Y[2];        // observations
  int length_u;                   // number of u indices to get inner summand
  vector<lower=0>[length_u] u;    // u indices
  int<lower=0> which_n[length_u]; // corresponding site indices
  matrix[n, length_u] u_mat;      // matrix of indicators to facilitate summation
 }

 parameters {
  vector<lower=0>[3] theta;       // expected values for component responses
 }

 transformed parameters {
  real theta_sum;
  vector[3] log_theta;
  vector[n] u_sum;
  vector[length_u] u_terms;

  theta_sum <- sum(theta);
  log_theta <- log(theta);

  for (i in 1:length_u){
    u_terms[i] <- exp(binomial_coefficient_log(Y[1, which_n[i]], u[i]) + 
                      binomial_coefficient_log(Y[2, which_n[i]], u[i]) + 
                      lgamma(u[i] + 1) + 
                      u[i] * (log_theta[3] - log_theta[1] - log_theta[2]));
  }
  u_sum <- u_mat * u_terms;
 }

 model {
  vector[n] loglik_el;
  real loglik;
  for (i in 1:n){
    loglik_el[i] <- Y[1, i] * log_theta[1] - lgamma(Y[1, i] + 1) +  
                  Y[2, i] * log_theta[2] - lgamma(Y[2, i] + 1) + 
                  log(u_sum[i]);
  }
  loglik <- sum(loglik_el) - n*theta_sum;
  increment_log_prob(loglik);
 }
  ", file="mvpois.stan")


 # estimate parameters
 library(rstan)
 d <- list(Y=Y, n=n, length_u=length_u, u=u, u_mat=u_mat, which_n=which_n)
 init <- stan("mvpois.stan", data = d, chains=0)
 mod <- stan(fit=init, 
            data=d, 
            iter=2000, chains=3, 
            pars=c("theta"))
 traceplot(mod, "theta", inc_warmup=F)


 # evaluate parameter recovery
 post <- extract(mod)
 par(mfrow=c(1, 3))
 for (i in 1:3){
  plot(density(post$theta[, i]))
  abline(v=theta[i], col="red")
 }
 par(mfrow=c(1, 1))

 library(scales)
 pairs(rbind(post$theta, theta), 
      cex=c(rep(1, nrow(post$theta)), 2), 
      pch=c(rep(1, nrow(post$theta)), 19), 
      col=c(rep(alpha("black", .2), nrow(post$theta)), "red"), 
      labels=c(expression(theta[1]), expression(theta[2]), expression(theta[3]))
      )
	## Simple bivariate Poisson model in stan
	## following parameterization in Karlis and Ntzoufras 2003
	## Simulate data
	n <- 50

	# indpendent Poisson components
	theta <- c(2, 3, 1)

	X_i <- array(dim=c(n, 3))
	for (i in 1:3){
	X_i[,i] <- rpois(n, theta[i])
	}

	# summation to produce bivariate Poisson RVs
	Y_1 <- X_i[, 1] + X_i[, 3]
	Y_2 <- X_i[, 2] + X_i[, 3]

	Y <- matrix(c(Y_1, Y_2),
	ncol=n,
	byrow=T)

	# Trick to generate a ragged array-esque set of indices for interior sum
	# we want to use a matrix operation to calculate the inner sum
	# (from 0 to min(y_1[i], y_2[i])) for each of i observations
	minima <- apply(Y, 2, min)
	u <- NULL
	which_n <- NULL
	for (i in 1:n){
	indices <- 0:minima[i]
	u <- c(u, indices)
	which_n <- c(which_n,
	rep(i, length(indices))
	)
	}

	length_u <- length(u) # should be sum(minima) + n

	# construct matrix of indicators to ensure proper summation
	u_mat <- array(dim=c(n, length_u))
	for (i in 1:n){
	u_mat[i, ] <- ifelse(which_n == i, 1, 0)
	}

	# plot data
	library(epade)
	par(mai=c(1.5, 1, 1, 1))
	bar3d.ade(Y_1, Y_2, wall=3)

	cat(
	"
	data {
	int<lower=1> n; // number of observations
	vector<lower=0>[n] Y[2]; // observations
	int length_u; // number of u indices to get inner summand
	vector<lower=0>[length_u] u; // u indices
	int<lower=0> which_n[length_u]; // corresponding site indices
	matrix[n, length_u] u_mat; // matrix of indicators to facilitate summation
	}

	parameters {
	vector<lower=0>[3] theta; // expected values for component responses
	}

	transformed parameters {
	real theta_sum;
	vector[3] log_theta;
	vector[n] u_sum;
	vector[length_u] u_terms;

	theta_sum <- sum(theta);
	log_theta <- log(theta);

	for (i in 1:length_u){
	u_terms[i] <- exp(binomial_coefficient_log(Y[1, which_n[i]], u[i]) +
	binomial_coefficient_log(Y[2, which_n[i]], u[i]) +
	lgamma(u[i] + 1) +
	u[i] * (log_theta[3] - log_theta[1] - log_theta[2]));
	}
	u_sum <- u_mat * u_terms;
	}

	model {
	vector[n] loglik_el;
	real loglik;
	for (i in 1:n){
	loglik_el[i] <- Y[1, i] * log_theta[1] - lgamma(Y[1, i] + 1) +
	Y[2, i] * log_theta[2] - lgamma(Y[2, i] + 1) +
	log(u_sum[i]);
	}
	loglik <- sum(loglik_el) - n*theta_sum;
	increment_log_prob(loglik);
	}
	", file="mvpois.stan")


	# estimate parameters
	library(rstan)
	d <- list(Y=Y, n=n, length_u=length_u, u=u, u_mat=u_mat, which_n=which_n)
	init <- stan("mvpois.stan", data = d, chains=0)
	mod <- stan(fit=init,
	data=d,
	iter=2000, chains=3,
	pars=c("theta"))
	traceplot(mod, "theta", inc_warmup=F)


	# evaluate parameter recovery
	post <- extract(mod)
	par(mfrow=c(1, 3))
	for (i in 1:3){
	plot(density(post$theta[, i]))
	abline(v=theta[i], col="red")
	}
	par(mfrow=c(1, 1))

	library(scales)
	pairs(rbind(post$theta, theta),
	cex=c(rep(1, nrow(post$theta)), 2),
	pch=c(rep(1, nrow(post$theta)), 19),
	col=c(rep(alpha("black", .2), nrow(post$theta)), "red"),
	labels=c(expression(theta[1]), expression(theta[2]), expression(theta[3]))
	)