Last active
August 29, 2015 14:06
-
-
Save dmarcelinobr/9cab589e474dd09dadbc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Data latest polls | |
polls = NULL | |
polls <- data.frame( rbind( | |
Opinium = c(43, 47, 1156), | |
Survation = c(44, 48, 1000), | |
ICM = c(41, 45, 1175) | |
)) | |
# set up for decimals | |
polls[, 1:2] <- polls[, 1:2]/100 | |
# weighting polls | |
wtd.polls <- rbind(polls, c(apply(polls[,1:2],2, weighted.mean, polls[,3]), sum(polls[,3]))) | |
names(wtd.polls) <- c("Yes","No","n") | |
wtd.polls$Others = 1-wtd.polls$Yes-wtd.polls$No | |
# Adjusting for the undecideds for the final results | |
wtd.polls[5,] <- data.frame(wtd.polls[4,1:2] + wtd.polls[4,1:2]/ sum(wtd.polls[4,1:2])*wtd.polls[4,4],n=wtd.polls[4,3],Others=0) | |
################################################## | |
## If we have more than two categories (Yes, No, and DK), Beta distribution won't work properly. | |
## The following is a function which I used to draw from a dirichlet distribution. | |
##The Dirichlet distribution is the multidimensional generalization of the beta distribution. It's the canonical Bayesian distribution for the parameter estimates of a multinomial distribution. | |
## The Dirichlet function returns a matrix with n rows, each containing a single Dirichlet random deviate. | |
################################################## | |
row= 5 | |
prob2win = function(row,export=1){ | |
p=rdirichlet(100000, | |
wtd.polls$n[row] * | |
c(wtd.polls$Yes[row], wtd.polls$No[row], 1-wtd.polls$Yes[row]-wtd.polls$No[row])+1 | |
) | |
if(export==1){ | |
mean(p[,1]<p[,2]) ## No exceeds Yes? | |
} else { | |
return(p) | |
} | |
} | |
(No.win.probs = sapply(1:nrow(wtd.polls),prob2win)) | |
## set simulated data for determining parameters and graphing | |
p = prob2win(row= 5, export=0) | |
## Get the marginal distribution. Since we have two classes the Dirichlet is distributed as a Beta. | |
p = cbind(p, p[,2]-p[,1]) | |
## Draw a histogram of simulated data | |
hist(p[,5], col="gray", nclass=50, main="Histogram of the Yes/No Differences", xlab="Yes/No Difference") | |
abline(v=0, col=c('red'), lwd=2, lty=c(1)) | |
## In this case I ran the function a few time to set the start value close to what I believe be the output. | |
## So, you have to adjust the shape parameters (shape1 and shape2) manually; it's much of a trial and error exercise, | |
## but you can use the function beta.par from SciencesPo package to estimate these parameters using \mu and \sigma^2. | |
library(MCMCpack) | |
fit1 = fitdistr(p[,1], "beta", | |
start=list(shape1=455,shape2=555)) | |
fit2 = fitdistr(p[,2], "beta", | |
start=list(shape1=545,shape2=455)) | |
library(png) | |
#Replace the directory and file information with your info | |
png <- readPNG(file.choose()) | |
#Get the plot information so the image will fill the plot box, and draw it | |
lim <- par() | |
rasterImage(png, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4]) | |
grid() | |
## Draw densities of simulated data | |
curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), ylim=c(0,50), xlim=c(.40,.60), col='NA', lty=1, lwd=3, ylab="Density", xlab=expression("Posterior "*pi(theta)*""), main="Distribution of the Yes/No Preference Referendum 2014", sub=paste("Probability that NO wins: ", round(No.win.probs[5],3) ) ) ## yes 1 | |
curve(dbeta(x,fit1$estimate[1],fit1$estimate[2]), xlim=c(.43,.52), col='green', lty=1, add=TRUE, lwd=8) ## yes 1 | |
curve(dbeta(x,fit2$estimate[1],fit2$estimate[2]), add=TRUE, col='magenta', xlim=c(.49,.56), lty=1, lwd=8) ## No 2 | |
abline(v=c(median(p[,1]), median(p[,2])), col=c('green','magenta'), lwd=2, lty=c(2,2)) | |
legend("topright",c("Yes","No"), lwd=2, col=c('green','magenta'), lty=c(1,1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment