Rには統計解析やモデリングで利用する種々の確率分布に対応した関数が用意されている. これらは,各種の確率分布に従う乱数の発生の他,確率関数(確率密度関数),累積確率分布,そしてパーセント点の評価に分類される.例えば,2項分布\(Bin(N,p)\)を例にとると以下の通り.これと同様にして,頭文字を\(\verb/r, d, p, q/\)とすることで他の確率分布にも適用できる.
N <- 10
p <- 0.2
Yrange <- 0:10
Prob <- dbinom(Yrange, N, p)
Cum <- pbinom(Yrange, N, p)
par(mfrow=c(2,1))
plot(Yrange, Prob, type="h", lwd=5, xlab="y",
main="Prob function for Bin(10,0.2)")
plot(Yrange, Cum, type="l", lwd=2, xlab="y",
main="Cumulative prob fuction for Bin(10,0.2)", ylim=c(0,1))
Do by yourself!
plot(Yrange, Prob, type="h", lwd=5, xlab="y",
main=paste("2項分布 Bin(",N,",",p,") の確率関数"))
Notation
\(B_t\): 出産数\(P_t\)個のうち雌の数
1年あたりの妊娠率 b (=0.4) [雌1個体から1個体出産]
Stochastic nature
\[ \begin{array}{lcl} S_t|{N_t} &\sim& Bin(N_t, 1-d)\\ P_t|{S_t} &\sim& Bin(S_t, b)\\ B_t|{P_t} &\sim& Bin(P_t, 0.5)\\ N_{t+1} = S_t + B_t \end{array} \]
#パラメータ値の設定
d <- 0.2
b <- 0.4
TT <- 10
TT1 <- TT-1
Nsim <- 100
Nf <- array(0, c(Nsim, TT))
Nf[,1] <- 5
#1回だけの計算
for(t in 1:TT1) {
Surv <- rbinom(1, Nf[1, t], 1-d)
Preg <- rbinom(1, Surv, b)
Birthf <- rbinom(1, Preg, 0.5)
Nf[1, t+1] <- Surv + Birthf
}
Nf[1,]
[1] 5 2 3 3 3 4 3 3 2 2
plot(Nf[1,], type="l", col="skyblue", xlab="Year", ylab="#Female animals", ylim=c(0,10))
#計算の繰り返し
for(i in 1:Nsim){
for(t in 1:TT1){
Surv <- rbinom(1, Nf[i, t], 1-d)
Preg <- rbinom(1, Surv, b)
Birthf <- rbinom(1, Preg, 0.5)
Nf[i, t+1] <- Surv + Birthf
}
}
plot(Nf[1,], type="l", col="skyblue", ylim=c(0,max(Nf)),
xlab="Year", ylab="#Female animals", main=paste("TT=",TT))
for(i in 1:Nsim){
points(Nf[i,], type="l", col="skyblue")
}
10年後に絶滅する確率は?
paste("Extinction prob = ", mean(Nf[,TT] == 0))
[1] "Extinction prob = 0.17"
\(Y \sim Po(2.5)\) に対して \(Pr(Y=y)\) および\(P(Y<=y) (y=0,1,...,10)\) の計算
lambda <- 2.5
Yrange <- 0:10
Prob <- dpois(Yrange, lambda)
Cum <- ppois(Yrange, lambda)
par(mfrow=c(2,1))
plot(Yrange, Prob, type="h", lwd=5, xlab="y",
main="Prob func for Po(2.5)")
plot(Yrange, Cum, type="l", lwd=2, xlab="y",
main="Cumulative prob func for Po(2.5)", ylim=c(0,1))
\(Y \sim Po(5)\) に対して \(Pr(Y=y)\)および\(P(Y<=y) (y=0,1,...,20)\) の計算
Do by yourself!
\(Y \sim Po(2.5)\) と \(Y \sim Bin(50,0.05)\) の比較
Do by yourself!
d <- 0.2
b <- 0.4
lambda <- 0.2
TT <- 10; TT1<-TT-1
Nsim <- 100
Nf <- array(0, c(Nsim, TT))
Nf[,1] <- 5
for(i in 1:Nsim){
for(t in 1:TT1){
Surv <- rbinom(1, Nf[i, t], 1-d)
Preg <- rbinom(1, Surv, b)
if(Preg==0){Birthf <- 0} else {Birthf <- Preg + sum(rpois(Preg, lambda))}
Nf[i, t+1] <- Surv + Birthf
}
}
plot(Nf[1,], type="l", col="skyblue", xlab="Year", ylab="#Female animals",
ylim=c(0,max(Nf)), main=paste("TT=",TT))
for(i in 1:Nsim){
points(Nf[i,], type="l", col="skyblue")
}
paste("Extinction prob = ", mean(Nf[,TT] == 0))
[1] "Extinction prob = 0.02"
TT <- 20; TT1<-TT-1
Nf <- array(0, c(Nsim, TT))
Nf[,1] <- 5
for(i in 1:Nsim){
for(t in 1:TT1){
Surv <- rbinom(1, Nf[i, t], 1-d)
Preg <- rbinom(1, Surv, b)
if(Preg==0){Birthf <- 0} else {
Birth <- Preg + sum(rpois(Preg, lambda))
Birthf <- rbinom(1,Birth,0.5)
}
Nf[i, t+1] <- Surv + Birthf
}
}
plot(Nf[1,], type="l", col="skyblue", xlab="Year", ylab="#Female animals",
ylim=c(0,max(Nf)), main=paste("TT=",TT))
for(i in 1:Nsim){
points(Nf[i,], type="l", col="skyblue")
}
paste("Extinction prob = ", mean(Nf[,TT] == 0))
[1] "Extinction prob = 0.35"
\[ Y \sim U(a,b) \\ f(y) = \frac{1}{b-a} \quad \mbox{for } a < y < b \]
\[ X \sim U(0,1) \\ Y \sim U(0,1) \\ Pr(X^2+Y^2 \leq 1) = \pi/4 \]
Nit <- 100
x <- runif(Nit, 0, 1)
y <- runif(Nit, 0, 1)
plot(x, y)
curve( sqrt(1-x^2), xlim=c(0,1), add=T)
4*mean(x^2+y^2 <= 1)
[1] 3.16
関数化すると
Calc_pi <- function(Nit, Fig=F)
{
x <- runif(Nit, 0, 1)
y <- runif(Nit, 0, 1)
if(Fig==T){
plot(x, y, main=paste("#interations = ", Nit), pch=19, cex=0.6, col="gray")
curve( sqrt(1-x^2), xlim=c(0,1), add=T, col="red")
}
4*mean(x^2+y^2 <= 1)
}
par(mfrow=c(2,2))
Calc_pi(100, Fig=T)
[1] 3.4
Calc_pi(1000, Fig=T)
[1] 3.132
Calc_pi(10000, Fig=T)
[1] 3.152
Calc_pi(100000, Fig=T)
[1] 3.1408
Nit_vec <- seq(100,10000,100)
Pi_vec <- sapply(Nit_vec, Calc_pi)
plot(Nit_vec, Pi_vec, type="l", xlab="#iterations", ylab="Estiamte of pi")
abline(pi, 0, col="blue")
Nit_vec <- seq(100,100000,100)
Pi_vec <- sapply(Nit_vec, Calc_pi)
plot(Nit_vec, Pi_vec, type="l", xlab="#iterations", ylab="Estiamte of pi",
main="Nit is up to 10^5", ylim=c(3,3.3))
abline(pi, 0, col="blue")
Nit_vec <- seq(1000,1000000,1000)
Pi_vec <- sapply(Nit_vec, Calc_pi)
plot(Nit_vec, Pi_vec, type="l", xlab="#iterations", ylab="Estiamte of pi",
main="Nit is up to 10^6", ylim=c(3,3.3))
abline(pi, 0, col="blue")
3次関数 の最小化を通してRによる最適化法に慣れましょう!
#関数の定義の仕方(利用する関数function)
f <- function(x){x^3 + x^2 - 2*x }
#関数の描き方
curve(f, xlim=c(-3,3))
#関数の描き方(another one)
#x <- seq(-3,3,0.01)
#plot(x, f(x), type="l")
#関数の最小化(利用する関数optim)
#optim(初期値,関数名,最適化の方法(通常はBFGSニュートン法))
optim(-2, f, method="BFGS")
$par
[1] -2.612088e+21
$value
[1] -1.782228e+64
$counts
function gradient
6 6
$convergence
[1] 0
$message
NULL
optim(0, f, method="BFGS")
$par
[1] 0.5485839
$value
[1] -0.6311303
$counts
function gradient
12 6
$convergence
[1] 0
$message
NULL
optim(1, f, method="BFGS")
$par
[1] 0.5485839
$value
[1] -0.6311303
$counts
function gradient
11 6
$convergence
[1] 0
$message
NULL
#定義域を[0,1]に制限しての関数の最小化①
optim(-2, f, method="L-BFGS-B", lower=0, upper=1)
$par
[1] 0.5485836
$value
[1] -0.6311303
$counts
function gradient
7 7
$convergence
[1] 0
$message
[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
#定義域を[0,1]に制限しての関数の最小化②(長くなるけど,こちらの方がお勧め)
f <- function(logitx){
x <- exp(logitx)/(1+exp(logitx))
# x <- 1/(1+exp(-logitx))
x^3 + x^2 - 2*x
}
res <- optim(-5, f, method="BFGS")
res$par
[1] 0.1949502
exp(res$par) /(1+exp(res$par))
[1] 0.5485838
2項分布に関するRの関数を覚え,計算に使えるようになりましょう!
★2項分布に関するRの関数 dbinom(観測値の値(y), 試行回数(N), 確率(p), log=T or F) rbinom(乱数の数,試行回数(N), 確率(p))
#Rによる確率の計算
N <- 10
p <- 0.3
dbinom(3, N, p)
[1] 0.2668279
y <- seq(0, N, 1)
prob <- dbinom(y, N, p)
prob
[1] 0.0282475249 0.1210608210 0.2334744405 0.2668279320 0.2001209490
[6] 0.1029193452 0.0367569090 0.0090016920 0.0014467005 0.0001377810
[11] 0.0000059049
plot(y, prob, type="h", lwd=3)
#Rによる乱数の生成
ns <- 20
yobs <- rbinom(ns, N, p)
yobs
[1] 0 1 3 1 3 3 4 4 2 1 2 2 1 1 4 1 3 4 3 1
mean(yobs)
[1] 2.2
var(yobs)
[1] 1.642105
sd(yobs)
[1] 1.281447
median(yobs)
[1] 2
#pに対する(負の)対数尤度関数の定義とグラフ
negloglike.bin <- function(p){
prob <- dbinom(yobs, N, p, log=T)
loglike <- sum(prob)
negloglike <- -loglike
negloglike
}
p.vec <- seq(0.01, 0.99, 0.01)
negLL <- sapply(p.vec, negloglike.bin)
plot(p.vec, negLL, type="l")
#★pに対する(負の)対数尤度関数の最小化①
res.bin <- optim(0.5, negloglike.bin, method="L-BFGS-B", lower=0.001, upper=0.999)
res.bin
$par
[1] 0.2200011
$value
[1] 32.51762
$counts
function gradient
8 8
$convergence
[1] 0
$message
[1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
p.est <- res.bin$par
p.est
[1] 0.2200011
mean(yobs)/N
[1] 0.22
#★(補足) pに対する(負の)対数尤度関数の最小化②
negloglike.bin <- function(par){
p <- 1/(1+exp(-par))
prob <- dbinom(yobs, N, p, log=T)
loglike <- sum(prob)
negloglike <- -loglike
negloglike
}
res.bin <- optim(0, negloglike.bin, method="BFGS")
p.est <- 1/(1+exp(-res.bin$par))
p.est
[1] 0.2199988