## Warning: package 'knitr' was built under R version 3.5.3

Simple binomial example (analytical approach)

Setting

  • Observation \(Y \sim Bin(N, \theta)\)
  • Prior \(\theta \sim Beta(a,b)\)

\[ \begin{array}{l} f(y|\theta) = \binom{N}{y} \theta^y (1-\theta)^{N-y} \\ \pi(\theta) = \frac{1}{B(a,b)} \theta^{a-1} (1-\theta)^{b-1} \end{array} \]

\(\Longrightarrow\)

  • Posterior \(\theta|y \sim Beta(y+a,b+N-y)\)

\[ \begin{array}{lcl} \pi(\theta|y) &=& {\displaystyle}\frac{{\displaystyle}f(y|\theta) \pi(\theta)}{{\displaystyle}f(y)} = {\displaystyle}\frac{f(y|\theta) \pi(\theta)}{{\displaystyle}\int_0^1 f(y|\theta) \pi(\theta) d\theta} \\ &=& {\displaystyle}\frac{{\displaystyle}\binom{N}{y} \theta^y (1-\theta)^{N-y} \frac{1}{B(a,b)} \theta^{a-1} (1-\theta)^{b-1}} {{\displaystyle}\int_0^1 \binom{N}{y} \theta^y (1-\theta)^{N-y} \frac{1}{B(a,b)} \theta^{a-1} (1-\theta)^{b-1} d\theta} \\ &=& {\displaystyle}\frac{1}{{\displaystyle}B(y+a,N+y-b)} \ \theta^{y+a-1} (1-\theta)^{N-y+b-1} \end{array} \]

Draw prior and posterior distributions

par(mfrow=c(2,2))

N1<-10; y1<-2
N2<-50; y2<-10
theta <- seq(0.001,0.999,0.001)

# non-informative prior Beta(1,1)
a <- 1; b<-1
plot(theta, dbeta(theta, a, b), type="l", ylab="pdf", ylim=c(0,8), main="(A) Prior Beta(1,1)")
points(theta, dbeta(theta, y1+a, N1-y1+b), type="l", lty=2, col="blue")
points(theta, dbeta(theta, y2+a, N2-y2+b), type="l", lty=3, col="red")

# Informative prior Beta(2,1)
a <- 2; b<-1
plot(theta, dbeta(theta, a, b), type="l", ylab="pdf",ylim=c(0,8), main="(B) Prior Beta(2,1)")
points(theta, dbeta(theta, y1+a, N1-y1+b), type="l", lty=2, col="blue")
points(theta, dbeta(theta, y2+a, N2-y2+b), type="l", lty=3, col="red")

# Informative prior Beta(2,3)
a <- 2; b<-3
plot(theta, dbeta(theta, a, b), type="l", ylab="pdf",ylim=c(0,8), main="(C) Prior Beta(2,3)")
points(theta, dbeta(theta, y1+a, N1-y1+b), type="l", lty=2, col="blue")
points(theta, dbeta(theta, y2+a, N2-y2+b), type="l", lty=3, col="red")

# Non-informative prior Beta(1/2,1/2)
a <- b <- 1/2
plot(theta, dbeta(theta, a, b), type="l", ylab="pdf",ylim=c(0,8), main="(D) Prior Beta(1/2,1/2)")
points(theta, dbeta(theta, y1+a, N1-y1+b), type="l", lty=2, col="blue")
points(theta, dbeta(theta, y2+a, N2-y2+b), type="l", lty=3, col="red")

Function for creating 95% Credible Interval (equal tail)

CI_1 <- function(a, b, N, y, level){
 percentile <- c((1-level)/2, 1-(1-level)/2)
 CI <- qbeta(percentile, y+a, N-y+b)
 CI
}

Function for creating 95% Credible Interval (grid HPDI)

CI_2 <- function(a, b, N, y, level, split=1000){
 theta <- seq(0,1,length.out=split+1)[-c(1,(split+1))]
 PDF <- dbeta(theta, y+a, N-y+b)/split
 clist <- seq(0.00001,max(PDF),length.out=100)
 SS <- numeric(100)
 for(i in 1:100){ SS[i] <- sum(PDF[PDF > clist[i]]) }
 Num <- which.min(abs(SS-level))
 INT <- theta[PDF > clist[Num]]
 CI <- c(min(INT), max(INT))
 CI
}

Bayes estimate from posterior distrubution

#Example data
N1 <- 20
y1 <- 4
a <- b <- 1

#Posterior
par(mfrow=c(1,1))
plot(theta, dbeta(theta, y1+a, N1-y1+b), type="l", ylab="pdf")

theta.mean<-(a+y1)/(N1+a+b)
points(theta.mean, 0, pch=4, col="red")
theta.mode<-(a+y1-1)/(N1+a+b-2)
points(theta.mode, 0, pch=4, col="blue")
lines(c(theta.mode, theta.mode), c(0,dbeta(theta.mode, y1+a, N1-y1+b)), lty=2, col="blue")

CI_1.res <- CI_1(a=a, b=b, N=N1, y=y1, level=0.95)
CI_1.res
[1] 0.08217588 0.41906604
lines(CI_1.res, c(0,0), lwd=2, col="green")

CI_2.res <- CI_2(a=a, b=b, N=N1, y=y1, level=0.95)
CI_2.res
[1] 0.070 0.398
lines(CI_2.res, c(0.1,0.1), lwd=2, col="blue")


Simple binomial example (simulation approach)

Simulation-based posterior calculaiton
- Grid method - Sampling Importance-resampling (SIR) method
- Markov Chain Monte carlo (MCMC) methods
etc.

Grid method

For \(\theta_1 < \theta_2 < \cdots < \theta_k\), evaluate the likelihood and prior, and then compute the following values for obataining the posterior numerically. \[ \pi(\theta_i | y) = {\displaystyle}\frac{f(y|\theta_i) \pi(\theta_i)}{{\displaystyle}\sum_{j=1}^k f(y|\theta_j) \pi(\theta_j)} \]

N1 <- 10
y1 <- 2
a <- b <- 1
Increment <- 0.001
theta <- seq(0.001,0.999,Increment)
like <- dbinom(y1, N1, theta)
prior <- dbeta(theta, a, b)
posterior1 <- like*prior/sum(Increment*like*prior)

SIR

  1. Draw \(m\) samples, \(\theta_1,...,\theta_m\), from a proposal distribution \(g(\theta)\)
  2. Calculate (relative) weight vector as \[ \begin{array}{lcl} {\displaystyle}\omega_i &=& {\displaystyle}\frac{{\displaystyle}\frac{\pi(\theta_i|y)}{g(\theta_i)} }{{\displaystyle}\sum_{j=1}^m \frac{\pi(\theta_j|y)}{g(\theta_j)} } &=& {\displaystyle}\frac{{\displaystyle}\frac{f(y|\theta_i) \pi(\theta_i)}{g(\theta_i)} }{{\displaystyle}\sum_{j=1}^m \frac{f(y|\theta_i) \pi(\theta_i)}{g(\theta_j)} } \end{array} \]
  3. Resample \(n (<< m)\) samples, \(\tilde{\theta}_1,...,\tilde{\theta}_n\), from the original samples, \(\theta_1,...,\theta_m\), with probabilies defined by \(\omega_1,...,\omega_m\) with replacement.

Note: If \(g(\theta)=\pi(\theta)\), then the weight is just proportional to the likelihood as follows: \[ {\displaystyle}\omega_i = \frac{{\displaystyle}f(y|\theta_i) }{{\displaystyle}\sum_{j=1}^n f(y|\theta_j) }. \]

#proposal = prior Beta(1,1)
MM <- 10^5
NN <- 10^3 
theta.sample <- rbeta(MM, a, b)
weight <- dbinom(y1, N1, theta.sample)
theta.resample <- sample(theta.sample, NN, prob=weight, replace=T)

plot(theta, dbeta(theta, a+y1, b+N1-y1), type="l", ylab="Denstiy")
points(theta, posterior1, type="l", lty=2, col="blue") #overlaid 
hist(theta.resample, add=T, freq=F) #perfect! 

MCMC (to come perhaps the class sometime in autumn)

Perhaps we will use WinBUGS, Jags, Stan etc…