library(knitr)
library(rmarkdown)
library(kableExtra)
library(ggplot2)
library(ggfortify)
library(tidyverse)
Part I の続き.TMBとstanを同じプロジェクト内で利用すると問題が生じるので,別資料とした.
WinBugsやJAGSは離散的な潜在変数モデルと相性が良いが,TMBやStanは潜在変数に連続変数しか許さないため,一見するとhidden Markov modelには適用しにくいと考えられるが,潜在変数を周辺化すれば問題なく利用できる.さらに当然のことながら,TMBなら計算時間も格段早くなる.
同じデータを利用(詳細は省略)
Example data can be generated according to the model shown above.
lambda1 = 5
lambda2 = 10
theta1 = c(0.9, 0.1)
theta2 = c(0.2, 0.8)
theta = rbind(theta1,theta2)
Ts = 100
Time = 1:Ts
y = numeric(Ts)
mu = numeric(Ts)
Ks = 2
z = numeric(Ts)
set.seed(2020)
rv = runif(Ts, 0,1)
z[1] = sample(1:2, 1)
for(t in 2:Ts){
u = rv[t]
for(k in 1:Ks){
if(z[t-1]==k) z[t] = (u<theta[k,1])*1 + (u>=theta[k,1])*2
}
}
mu = lambda1*(z==1)+lambda2*(z==2)
y = rpois(Ts,mu)
plot(Time, y, type="l", ylab="Observation")
points(Time, mu, type="l", lty=2, lwd=1.5, col="red")
#install.packages("rstan")
library(rstan)
rstan_options(auto_write=TRUE)
options(mc.cores=parallel::detectCores())
library(bayesplot)
Stan_Model <-
"data{
int Ts;
int Ks;
int y[Ts];
real a;
real b;
vector[Ks] alpha;
}
parameters{
positive_ordered[Ks] lambda;
simplex[Ks] theta[Ks];
}
transformed parameters{
real acc[Ks];
matrix[Ts,Ks] log_gamma;
for(k in 1:Ks) {
log_gamma[1,k] = poisson_lpmf(y[1] | lambda[k]);
}
for(t in 2:Ts){
for(k in 1:Ks){
for(j in 1:Ks){
acc[j] = log_gamma[t-1,j] + log(theta[j,k]) + poisson_lpmf(y[t] | lambda[k]);
}
log_gamma[t,k] = log_sum_exp(acc);
}
}
}
model{
//prior
for(k in 1:Ks){
theta[k] ~ dirichlet(alpha);
lambda[k] ~ gamma(a,b);
}
//Likelihood
for(t in 1:Ts){
target += log_sum_exp(log_gamma[t]);
}
}
"
writeLines(Stan_Model, con="Stan_Model.stan")
Stan.data <- list(Ts=Ts, Ks=Ks, y=y, a=1, b=1, alpha=c(1,1))
Res_Stan <- stan(
file = "Stan_Model.stan",
data = Stan.data,
pars=c('lambda', 'theta'),
init="random",
seed = 2020,
chains = 3,
iter = 15000,
warmup = 5000,
thin= 10,
control=list(adapt_delta=0.99, max_treedepth = 15)
)
print(Res_Stan,probs=c(0.025, 0.5, 0.975))
Inference for Stan model: Stan_Model.
3 chains, each with iter=15000; warmup=5000; thin=10;
post-warmup draws per chain=1000, total post-warmup draws=3000.
mean se_mean sd 2.5% 50% 97.5% n_eff Rhat
lambda[1] 4.94 0.00 0.10 4.75 4.95 5.13 2705 1
lambda[2] 9.42 0.00 0.15 9.14 9.42 9.71 3237 1
theta[1,1] 0.80 0.00 0.02 0.77 0.80 0.84 2817 1
theta[1,2] 0.20 0.00 0.02 0.16 0.20 0.23 2817 1
theta[2,1] 0.29 0.00 0.02 0.25 0.29 0.34 3014 1
theta[2,2] 0.71 0.00 0.02 0.66 0.71 0.75 3014 1
lp__ -13240.91 0.03 1.41 -13244.52 -13240.58 -13239.14 2997 1
Samples were drawn using NUTS(diag_e) at Fri Apr 17 11:18:31 2020.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
stan_trace(Res_Stan, pars=c("lambda","theta[1,1]","theta[2,1]"))
#stan_dens(Res_Stan, fill="skyblue", pars=c("lambda"))
#stan_dens(Res_Stan, fill="skyblue", pars=c("theta[1,1]","theta[2,1]"))
stan_hist(Res_Stan, fill="skyblue", pars=c("lambda","theta[1,1]","theta[2,1]"))
mcmc_sample_stan <- rstan::extract(Res_Stan, permuted=FALSE)
mcmc_combo(mcmc_sample_stan, pars=c("lambda[1]","lambda[2]"))
mcmc_combo(mcmc_sample_stan, pars=c("theta[1,1]","theta[2,1]"))