Lecture 4 ベイズ推定法 (2) 演習 -Poisson-
ベイズ法によるポアソン分布における点推定
尤度と事前分布の定義
データ \(y=(y_1,\dots,y_n) \sim (iid) Po(\lambda)\)
事前分布 \(\lambda \sim Ga(a,b)\)
\[\begin{equation} \displaystyle f(y|\lambda) = \prod_{i=1}^n e^{-\lambda} \frac{\displaystyle\lambda^{y_i}}{y_i!} = e^{-n \lambda} \frac{\displaystyle\lambda^{\sum_{i=1}^n y_i}}{\prod_{i=1}^n y_i!} \end{equation}\] \[\begin{equation} \displaystyle \pi(\lambda) = \frac{b^a}{\Gamma(a)} \lambda^{a-1} e^{-b\lambda}, \quad \mbox{where} \quad \Gamma(a) = \int_0^\infty x^{a-1} e^{-x} dx \end{equation}\] \[\begin{equation} \begin{array}{l} \displaystyle E[\lambda] = \frac{a}{b} \\ \displaystyle V[\lambda] = \frac{a}{b^2} \end{array} \end{equation}\]事前分布から事後分布への更新
事前分布 \(\lambda \sim Ga(a, b)\)
事後分布 \(\lambda|y \sim Ga(a+\sum_{i=1}^n y_i, b+n)\)
\[\begin{equation} \begin{array}{lcl} \pi(\lambda|y) &=& \displaystyle \frac{\displaystyle f(y|\lambda) \pi(\lambda)}{\displaystyle f(y)} = \displaystyle \frac{f(y|\lambda) \pi(\lambda)}{\displaystyle \int_0^\infty f(y|\lambda) \pi(\lambda) d\lambda} \\ &=& \displaystyle \frac{\displaystyle e^{-n\lambda} \frac{\displaystyle \lambda^{\sum_{i=1}^n y_i}} {\displaystyle \prod_{i=1}^n y_i!} \frac{b^a}{\Gamma(a)} \lambda^{a-1} e^{-b \lambda}} {\displaystyle \int_0^\infty e^{-n\lambda} \frac{\displaystyle \lambda^{\sum_{i=1}^n y_i}} {\displaystyle \prod_{i=1}^n y_i!} \frac{b^a}{\Gamma(a) b^a} \lambda^{a-1} e^{-b \lambda} d\lambda} \\ &=& \displaystyle \frac{(b+n)^{a+\sum_{i=1}^n y_i}} {\displaystyle \Gamma(a+\sum_{i=1}^n y_i)} \lambda^{a+\sum_{i=1}^n y_i-1} e^{-(b+n)\lambda} \end{array} \end{equation}\]Example Data: \(\lambda \sim Ga(1, 1)\) から事後分布への更新
#真のパラメータ
lambda.true <- 3
#データ数が少ないとき
n1 <- 5; y1 <-rpois(n1,lambda.true); y1sum <- sum(y1)
#データ数が大きいとき
n2 <- 20; y2<-rpois(n2,lambda.true); y2sum <- sum(y2)
#図を描くためにパラメータの範囲を設定
lambda <- seq(0.001,10,0.001)
a <- 1; b<-1
plot(lambda, dgamma(lambda, a, b), type="l", lty=1, ylab="pdf", xlim=c(0,10))
points(lambda, dgamma(lambda, a+y1sum, n1+b), type="l", lty=2)
points(lambda, dgamma(lambda, a+y2sum, n2+b), type="l", lty=3)
パラメータの推定値とその分散
\[\begin{equation} \begin{array} \displaystyle \hat{\lambda}_{mean} &=& \displaystyle E[\lambda|y] = \frac{a+\sum_{i=1}^n y_i}{b+n} \\ &=& \displaystyle \frac{b}{b+n} \times \frac{a}{b} + \frac{n}{b+n} \times \frac{\sum_{i=1}^n y_i}{n} \\ &=& \displaystyle \frac{b}{b+n} E[\lambda] + \frac{n}{b+n} \hat{\lambda}_{ML} \end{array} \end{equation}\]ベイズ推測法における区間推定
95% Credible Interval (grid HPDI)
CI_2 <- function(a, b, y, min, max, level, split=1000){
lambda <- seq(min, max,length.out=split+1)[-c(1,(split+1))]
PDF <- dgamma(lambda, a+sum(y), b+length(y))/split
clist <- seq(0.00001,max(PDF),length.out=100)
SS <- numeric(100)
for(i in 1:100){ SS[i] <- sum(PDF[PDF > clist[i]]) }
Num <- which.min(abs(SS-level))
INT <- lambda[PDF > clist[Num]]
CI <- c(min(INT), max(INT))
CI
}
CI_2.res <-CI_2(a=1, b=1, y=y1, min=0, max=10, level=0.95); CI_2.res
[1] 1.19 5.05
CI_2.res <-CI_2(a=1, b=1, y=y2, min=0, max=10, level=0.95); CI_2.res
[1] 2.87 5.51
Posterior and Credible Interval
#Data 1
y <- y1
ysum <- sum(y); n<-length(y)
lambda.mean <- (a+ysum)/(n+b)
CI_2.res <-CI_2(a=1, b=1, y=y, min=0, max=10, level=0.95); CI_2.res
[1] 1.19 5.05
lambda <- seq(0,10,0.001)
plot(lambda, dgamma(lambda, a+ysum, b+n), type="l", ylab="pdf")
nn <- 100
lambda.CI <- seq(from=CI_2.res[1], to=CI_2.res[2], length.out=nn)
polygon(c(rev(lambda.CI),lambda.CI), c(rep(0,nn), dgamma(lambda.CI, a+ysum, b+n)),col="skyblue")
points(lambda.mean, 0, pch=4, col="red", cex=2)
data.frame(lambda.mean, CI.LB=CI_2.res[1], CI.UB=CI_2.res[2])
lambda.mean CI.LB CI.UB
1 2.833333 1.19 5.05