Lecture 4 ベイズ推定法 (1) 演習 -Binomial-
ベイズ推測法の第一歩 -2項分布における点推定-
事前分布と事後分布の定義
\[\begin{equation} f(y|\theta) = \binom{N}{y} \theta^y (1-\theta)^{N-y} \end{equation}\] \[\begin{equation} \pi(\theta) = \frac{1}{B(a,b)} \theta^{a-1} (1-\theta)^{b-1} \end{equation}\]事前分布から事後分布への更新
\[\begin{equation} \begin{array}{lcl} \pi(\theta|y) &=& \displaystyle \frac{\displaystyle f(y|\theta) \pi(\theta)}{\displaystyle f(y)} = \displaystyle \frac{f(y|\theta) \pi(\theta)}{\displaystyle \int_0^1 f(y|\theta) \pi(\theta) d\theta} \\ &=& \displaystyle \frac{\displaystyle \binom{N}{y} \theta^y (1-\theta)^{N-y} \frac{1}{B(a,b)} \theta^{a-1} (1-\theta)^{b-1}} {\displaystyle \int_0^1 \binom{N}{y} \theta^y (1-\theta)^{N-y} \frac{1}{B(a,b)} \theta^{a-1} (1-\theta)^{b-1} d\theta} \\ &=& \displaystyle \frac{1}{\displaystyle B(y+a,N+y-b)} \ \theta^{y+a-1} (1-\theta)^{N-y+b-1} \end{array} \end{equation}\]Example Data
#データ数が少ないとき
N1<-10; y1<-1
#データ数が多いとき
N2<-100; y2<-10
#図を描くためにパラメータの範囲を設定
theta <- seq(0.001,0.999,0.001)
(A) non-informative prior \(\theta \sim U(0,1)=Beta(1,1)\) から事後分布への更新
a <- 1; b<-1
par(mfrow=c(1,3))
plot(theta, dbeta(theta, a, b), type="l", ylab="pdf", main="(A) Prior Beta(1,1)")
plot(theta, dbeta(theta, y1+a, N1-y1+b), type="l", ylab="pdf", main="Posterior with N=10, y=1")
theta.mode<-(a+y1-1)/(N1+a+b-2)
lines(c(theta.mode, theta.mode),c(0,dbeta(theta.mode, y1+a, N1-y1+b)), lty=2)
plot(theta, dbeta(theta, y2+a, N2-y2+b), type="l", ylab="pdf", main="Posterior with N=100, y=10")
theta.mode<-(a+y2-1)/(N2+a+b-2)
lines(c(theta.mode, theta.mode),c(0,dbeta(theta.mode, y2+a, N2-y2+b)), lty=2)
(B) \(\theta \sim Beta(24.9, 58.1)\) から事後分布への更新
par(mfrow=c(1,3))
a <- 24.9; b<-58.1
plot(theta, dbeta(theta, a, b), type="l", ylab="pdf", main="(B) Prior Beta(24.9, 58.1)")
theta.mode<-(a-1)/(a+b-2)
lines(c(theta.mode, theta.mode),c(0,dbeta(theta.mode, a, b)), lty=2)
plot(theta, dbeta(theta, y1+a, N1-y1+b), type="l", ylab="pdf", main="Posterior with N=10, y=1")
theta.mode<-(a+y1-1)/(N1+a+b-2)
lines(c(theta.mode, theta.mode),c(0,dbeta(theta.mode, y1+a, N1-y1+b)), lty=2)
plot(theta, dbeta(theta, y2+a, N2-y2+b), type="l", ylab="pdf", main="Posterior with N=100, y=10")
theta.mode<-(a+y2-1)/(N2+a+b-2)
lines(c(theta.mode, theta.mode),c(0,dbeta(theta.mode, y2+a, N2-y2+b)), lty=2)
(C) \(\theta \sim Beta(2, 2)\) から事後分布への更新
par(mfrow=c(1,3))
a <- 2; b<-2
plot(theta, dbeta(theta, a, b), type="l", ylab="pdf", main="(C) Prior Beta(2,2)")
theta.mode<-(a-1)/(a+b-2)
lines(c(theta.mode, theta.mode),c(0,dbeta(theta.mode, a, b)), lty=2)
plot(theta, dbeta(theta, y1+a, N1-y1+b), type="l", ylab="pdf", main="Posterior with N=10, y=1")
theta.mode<-(a+y1-1)/(N1+a+b-2)
lines(c(theta.mode, theta.mode),c(0,dbeta(theta.mode, y1+a, N1-y1+b)), lty=2)
plot(theta, dbeta(theta, y2+a, N2-y2+b), type="l", ylab="pdf", main="Posterior with N=100, y=10")
theta.mode<-(a+y2-1)/(N2+a+b-2)
lines(c(theta.mode, theta.mode),c(0,dbeta(theta.mode, y2+a, N2-y2+b)), lty=2)
パラメータの推定値とその分散
\[\begin{equation} \displaystyle \hat{\theta}_{mean} = E[\theta|Y=y] = \int_0^1 \theta \ \pi(\theta|y) d\theta = \frac{y+a}{N+a+b} \end{equation}\] \[\begin{equation} \displaystyle E[\theta|Y=y] = \frac{a+b}{N+a+b} E[\theta] + \frac{N}{N+a+b} \hat{\theta}_{ML} \end{equation}\] \[\begin{equation} \displaystyle V[\theta|Y=y] = \int_0^1 (\theta-E[\theta|Y=y])^2 \pi(\theta|y) d\theta = \frac{1}{N+a+b+1} \cdot \frac{y+a}{N+a+b} \cdot \frac{N-y+b}{N+a+b} \end{equation}\]ベイズ推測法における区間推定
95% Credible Interval (equal tail)
CI_1 <- function(a, b, N, y, level){
percentile <- c((1-level)/2, 1-(1-level)/2)
CI <- qbeta(percentile, y+a, N-y+b)
CI
}
CI_1.res <- CI_1(a=24.9, b=58.1, N=10, y=1, level=0.95); CI_1.res
[1] 0.1926792 0.3733314
95% Credible Interval (grid HPDI)
CI_2 <- function(a, b, N, y, level, split=1000){
theta <- seq(0,1,length.out=split+1)[-c(1,(split+1))]
PDF <- dbeta(theta, y+a, N-y+b)/split
clist <- seq(0.00001,max(PDF),length.out=100)
SS <- numeric(100)
for(i in 1:100){ SS[i] <- sum(PDF[PDF > clist[i]]) }
Num <- which.min(abs(SS-level))
INT <- theta[PDF > clist[Num]]
CI <- c(min(INT), max(INT))
CI
}
CI_2.res <-CI_2(a=24.9, b=58.1, N=10, y=1, level=0.95); CI_2.res
[1] 0.190 0.369
Posterior and Credible Interval
a<-24.9; b<-58.1; N<-10; y<-1
theta.mode<-(a+y-1)/(N+a+b-2)
theta.mean<-(a+y)/(N+a+b)
theta <- seq(0.001,0.999,0.001)
plot(theta, dbeta(theta, y+a, N-y+b), type="l")
lines(c(theta.mode, theta.mode),c(0,dbeta(theta.mode, y+a, N-y+b)), lty=2)
points(theta.mean, 0, pch=4, col="red", cex=2)
lines(CI_2.res, c(0,0), lwd=2, col="blue")
data.frame(theta.mode, theta.mean, CI.LB=CI_2.res[1], CI.UB=CI_2.res[2])
theta.mode theta.mean CI.LB CI.UB
1 0.2736264 0.2784946 0.19 0.369
a<-24.9; b<-58.1; N<-10; y<-1
theta.mode<-(a+y-1)/(N+a+b-2)
theta.mean<-(a+y)/(N+a+b)
theta <- seq(0.001,0.999,0.001)
plot(theta, dbeta(theta, y+a, N-y+b), type="l", ylab="pdf")
nn <- 100
theta.CI <- seq(from=CI_2.res[1], to=CI_2.res[2], length.out=nn)
polygon(c(rev(theta.CI),theta.CI), c(rep(0,nn), dbeta(theta.CI, y+a, N-y+b)),col="skyblue")
lines(c(theta.mode, theta.mode),c(0,dbeta(theta.mode, y+a, N-y+b)), lty=2)
points(theta.mean, 0, pch=4, col="red", cex=2)
data.frame(theta.mode, theta.mean, CI.LB=CI_2.res[1], CI.UB=CI_2.res[2])
theta.mode theta.mean CI.LB CI.UB
1 0.2736264 0.2784946 0.19 0.369