dmarcelinobr · August 29, 2015 14:06
diff --git a/Scotland2.R b/Scotland2.R
 # Data latest polls
 polls = NULL
 polls <- data.frame( rbind(
  Opinium = c(43, 47, 1156),
  Survation = c(44, 48, 1000),
  ICM =  c(41, 45, 1175)
 ))

 # set up for decimals
 polls[, 1:2] <- polls[, 1:2]/100

 # weighting polls 
 wtd.polls <- rbind(polls, c(apply(polls[,1:2],2, weighted.mean, polls[,3]), sum(polls[,3])))

 names(wtd.polls) <- c("Yes","No","n")

 wtd.polls$Others = 1-wtd.polls$Yes-wtd.polls$No

 # Adjusting for the undecideds for the final results
 wtd.polls[5,] <- data.frame(wtd.polls[4,1:2] + wtd.polls[4,1:2]/ sum(wtd.polls[4,1:2])*wtd.polls[4,4],n=wtd.polls[4,3],Others=0)

 ##################################################
 ## If we have more than two categories (Yes, No, and DK), Beta distribution won't work properly.
 ## The following is a function which I used to draw from a dirichlet distribution.
 ##The Dirichlet distribution is the multidimensional generalization of the beta distribution. It's the canonical Bayesian distribution for the parameter estimates of a multinomial distribution.
 ## The Dirichlet function returns a matrix with n rows, each containing a single Dirichlet random deviate.
 ##################################################
 row= 5
 prob2win = function(row,export=1){
  p=rdirichlet(100000,
  wtd.polls$n[row] *
  c(wtd.polls$Yes[row], wtd.polls$No[row], 1-wtd.polls$Yes[row]-wtd.polls$No[row])+1
  )
  if(export==1){
    mean(p[,1]<p[,2]) ## No exceeds Yes?
  } else {
    return(p)
  }
 }


 (No.win.probs = sapply(1:nrow(wtd.polls),prob2win))



 ## set simulated data for determining parameters and graphing 
 p = prob2win(row= 5, export=0)

 ## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta.
 p = cbind(p, p[,2]-p[,1])

 ## Draw a histogram of simulated data
 hist(p[,5], col="gray", nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference")
 abline(v=0, col=c('red'), lwd=2, lty=c(1))

 ## In this case I ran the function a few time to set the start value close to what I believe be the output.
 ## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise, 
 ## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2.
 library(MCMCpack)
 fit1 = fitdistr(p[,1], "beta",
                  start=list(shape1=455,shape2=555))

 fit2 = fitdistr(p[,2], "beta",
                    start=list(shape1=545,shape2=455))

 library(png)
 #Replace the directory and file information with your info
 png <- readPNG(file.choose())

 #Get the plot information so the image will fill the plot box, and draw it
 lim <- par()
 rasterImage(png, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4])
 grid()

 ## Draw densities of simulated data
 curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), ylim=c(0,50), xlim=c(.40,.60), col='NA', lty=1, lwd=3, ylab="Density", xlab=expression("Posterior "*pi(theta)*""), main="Distribution of the Yes/No Preference Referendum 2014", sub=paste("Probability that NO wins: ", round(No.win.probs[5],3) ) ) ## yes 1

 curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), xlim=c(.43,.52), col='green', lty=1, add=TRUE, lwd=8) ## yes 1

 curve(dbeta(x,fit2$estimate[1],fit2$estimate[2]), add=TRUE, col='magenta', xlim=c(.49,.56), lty=1, lwd=8) ## No 2

 abline(v=c(median(p[,1]), median(p[,2])), col=c('green','magenta'), lwd=2, lty=c(2,2))
 legend("topright",c("Yes","No"), lwd=2, col=c('green','magenta'), lty=c(1,1))
	# Data latest polls
	polls = NULL
	polls <- data.frame( rbind(
	Opinium = c(43, 47, 1156),
	Survation = c(44, 48, 1000),
	ICM = c(41, 45, 1175)
	))

	# set up for decimals
	polls[, 1:2] <- polls[, 1:2]/100

	# weighting polls
	wtd.polls <- rbind(polls, c(apply(polls[,1:2],2, weighted.mean, polls[,3]), sum(polls[,3])))

	names(wtd.polls) <- c("Yes","No","n")

	wtd.polls$Others = 1-wtd.polls$Yes-wtd.polls$No

	# Adjusting for the undecideds for the final results
	wtd.polls[5,] <- data.frame(wtd.polls[4,1:2] + wtd.polls[4,1:2]/ sum(wtd.polls[4,1:2])*wtd.polls[4,4],n=wtd.polls[4,3],Others=0)

	##################################################
	## If we have more than two categories (Yes, No, and DK), Beta distribution won't work properly.
	## The following is a function which I used to draw from a dirichlet distribution.
	##The Dirichlet distribution is the multidimensional generalization of the beta distribution. It's the canonical Bayesian distribution for the parameter estimates of a multinomial distribution.
	## The Dirichlet function returns a matrix with n rows, each containing a single Dirichlet random deviate.
	##################################################
	row= 5
	prob2win = function(row,export=1){
	p=rdirichlet(100000,
	wtd.polls$n[row] *
	c(wtd.polls$Yes[row], wtd.polls$No[row], 1-wtd.polls$Yes[row]-wtd.polls$No[row])+1
	)
	if(export==1){
	mean(p[,1]<p[,2]) ## No exceeds Yes?
	} else {
	return(p)
	}
	}


	(No.win.probs = sapply(1:nrow(wtd.polls),prob2win))



	## set simulated data for determining parameters and graphing
	p = prob2win(row= 5, export=0)

	## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta.
	p = cbind(p, p[,2]-p[,1])

	## Draw a histogram of simulated data
	hist(p[,5], col="gray", nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference")
	abline(v=0, col=c('red'), lwd=2, lty=c(1))

	## In this case I ran the function a few time to set the start value close to what I believe be the output.
	## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise,
	## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2.
	library(MCMCpack)
	fit1 = fitdistr(p[,1], "beta",
	start=list(shape1=455,shape2=555))

	fit2 = fitdistr(p[,2], "beta",
	start=list(shape1=545,shape2=455))

	library(png)
	#Replace the directory and file information with your info
	png <- readPNG(file.choose())

	#Get the plot information so the image will fill the plot box, and draw it
	lim <- par()
	rasterImage(png, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4])
	grid()

	## Draw densities of simulated data
	curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), ylim=c(0,50), xlim=c(.40,.60), col='NA', lty=1, lwd=3, ylab="Density", xlab=expression("Posterior "pi(theta)""), main="Distribution of the Yes/No Preference Referendum 2014", sub=paste("Probability that NO wins: ", round(No.win.probs[5],3) ) ) ## yes 1

	curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), xlim=c(.43,.52), col='green', lty=1, add=TRUE, lwd=8) ## yes 1

	curve(dbeta(x,fit2$estimate[1],fit2$estimate[2]), add=TRUE, col='magenta', xlim=c(.49,.56), lty=1, lwd=8) ## No 2

	abline(v=c(median(p[,1]), median(p[,2])), col=c('green','magenta'), lwd=2, lty=c(2,2))
	legend("topright",c("Yes","No"), lwd=2, col=c('green','magenta'), lty=c(1,1))