library(knitr)
library(rmarkdown)
library(kableExtra)
library(ggplot2)
library(ggplot2)
library(ggfortify)
library(tidyverse)
WinBugsやJAGSは離散的な潜在変数モデルと相性が良いが,TMBやStanは潜在変数に連続変数しか許さないため,一見するとhidden Markov modelには適用しにくいと考えられるが,潜在変数を周辺化すれば問題なく利用できる.さらに当然のことながら,TMBなら計算時間も格段早くなる.
TMBとstanを同じプロジェクト内で利用すると問題が生じるので,別資料とした.
The hidden Markov model (HMM) is a special case of the state-space model when the state variable takes discrete values. The Markov modulated poisson process (MMPP) is a special case of the HMM, where the observation is distributed to the poisson distribution.
The model used in this model is defined as an intutive way as follows:
Observation model: \[ Y_t|_{\mu_t} \sim Po(\mu_t) \]
State model:
\[\begin{equation} \mu_t |_{\mu_{t-1} = \lambda_1} = \left\{ \begin{array}{l} \lambda_1 \quad \mbox{with prob} \quad \theta_{11} \\ \lambda_2 \quad \mbox{with prob} \quad \theta_{12} \end{array} \right. \\ \mu_t |_{\mu_{t-1} = \lambda_2} = \left\{ \begin{array}{l} \lambda_1 \quad \mbox{with prob} \quad \theta_{21} \\ \lambda_2 \quad \mbox{with prob} \quad \theta_{22} \end{array} \right. \end{equation}\]
The model above can also be written using latent variables (\(z\)’s) as follows. This formulation is more convenient for “WinBugs” or “JAGS” coding.
Observation model: \[ Y_t|_{z_t=k} \sim Po(\lambda_k) \quad (k=1,\dots,K) \]
State model: \[ z_t |_{z_{t-1}=j} \sim Cat((1,\dots,K), \theta_1=(\theta_{j1},\dots,\theta_{jK})), \quad \sum_{k=1}^K \theta_{jk}=1 \]
Example data can be generated according to the model shown above.
lambda1 = 5
lambda2 = 10
theta1 = c(0.9, 0.1)
theta2 = c(0.2, 0.8)
theta = rbind(theta1,theta2)
Ts = 100
Time = 1:Ts
y = numeric(Ts)
mu = numeric(Ts)
set.seed(2020)
rv = runif(Ts, 0,1)
mu[1] = sample(c(lambda1,lambda2), 1)
for(t in 2:Ts){
u = rv[t]
if(mu[t-1]==lambda1) mu[t] = (u<theta[1,1])*lambda1 + (u>=theta[1,1])*lambda2
if(mu[t-1]==lambda2) mu[t] = (u<theta[2,1])*lambda1 + (u>=theta[2,1])*lambda2
}
y = rpois(Ts,mu)
plot(Time, y, type="l", ylab="Observation")
#abline(lambda1,0,lty=2,col="red")
#abline(lambda2,0,lty=2,col="red")
points(Time, mu, type="l", lty=2, lwd=1.5, col="red")
Alternatively, the same distribution can be generated as follows. You can confirm the both are same.
Ks = 2
z = numeric(Ts)
set.seed(2020)
rv = runif(Ts, 0,1)
z[1] = sample(1:2, 1)
for(t in 2:Ts){
u = rv[t]
for(k in 1:Ks){
if(z[t-1]==k) z[t] = (u<theta[k,1])*1 + (u>=theta[k,1])*2
}
}
y = rpois(Ts,mu)
plot(Time, y, type="l", ylab="Observation")
#abline(lambda1,0,lty=2,col="red")
#abline(lambda2,0,lty=2,col="red")
points(Time, mu, type="l", lty=2, lwd=1.5, col="red")
#install.packages("rjags")
library(rjags)
Model_jags1 <-
"model {
# hyper-parameters
alpha[1] <- 0.5
alpha[2] <- 0.5
# priors
for (k in 1:Ks) {
theta[k] ~ dbeta(1,1)
lambda[k] ~ dgamma(a,b)
}
# likelihood for latent variable z
z[1] ~ dcat(alpha)
for(t in 2:Ts) {
z[t] ~ dcat(tmp[t,])
tmp[t,1] <- (z[t-1]==1)*theta[1] + (z[t-1]==2)*theta[2]
tmp[t,2] <- 1-tmp[t,1]
}
# likelihood for observation y
for(t in 1:Ts) {
y[t] ~ dpois(lambda[z[t]])
}
}"
writeLines(Model_jags1, con="Model_jags1.txt")
I intentionally used a large number of chains here.
#Data
data.jags <- list(Ts=Ts,Ks=Ks,y=y,a=0.001,b=0.001)
#Initial values
inits <- list(
theta = runif(2,0,1),
lambda = runif(2,0.01,20)
)
#parameters <- c("theta","lambda","z")
parameters <- c("theta","lambda")
#MCMC settings
ni <- 20000
nb <- ni/2
nt <- 10
nc <- 10
Out.jags1 <- jags.model(
file = "Model_jags1.txt",
data = data.jags,
inits = list(inits,inits,inits,inits,inits,inits,inits,inits,inits,inits),
n.chain = nc)
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 100
Unobserved stochastic nodes: 104
Total graph size: 1005
Initializing model
update(Out.jags1, nb)
mcmc.jags1 <- coda.samples(
Out.jags1,
parameters,
n.iter = ni-nb,
thin = nt
)
summary(mcmc.jags1)
Iterations = 11010:21000
Thinning interval = 10
Number of chains = 10
Sample size per chain = 1000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
lambda[1] 4.9022 0.5319 0.005319 0.008861
lambda[2] 9.5387 1.1005 0.011005 0.017561
theta[1] 0.8217 0.1086 0.001086 0.001719
theta[2] 0.3398 0.1573 0.001573 0.002142
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
lambda[1] 3.7987 4.5554 4.9354 5.2781 5.8527
lambda[2] 7.7276 8.7400 9.4210 10.2184 11.9842
theta[1] 0.5554 0.7675 0.8433 0.9000 0.9665
theta[2] 0.1006 0.2221 0.3161 0.4356 0.7045
plot(mcmc.jags1)
#pdf("plot_JAGS.pdf")
#plot(mcmc_JAGS)
#dev.off()
You can find the so-called “label switching”.
A quick treatment for (\(\lambda_1 < \lambda_2\)) :
\[ \lambda_2 = \lambda_1 + \Delta \]
Model_jags2 <-
"model {
# hyper-parameters
alpha[1] <- 0.5
alpha[2] <- 0.5
# priors
for (k in 1:Ks) {
theta[k] ~ dbeta(1,1)
}
lambda1 ~ dgamma(a,b)
diff ~ dgamma(a,b)
lambda[1] <- lambda1
lambda[2] <- lambda1 + diff
# likelihood for latent variable z
z[1] ~ dcat(alpha)
for(t in 2:Ts) {
z[t] ~ dcat(tmp[t,])
tmp[t,1] <- (z[t-1]==1)*theta[1] + (z[t-1]==2)*theta[2]
tmp[t,2] <- 1-tmp[t,1]
}
# likelihood for observation y
for(t in 1:Ts) {
y[t] ~ dpois(lambda[z[t]])
}
}"
writeLines(Model_jags2, con="Model_jags2.txt")
#Initial values
inits <- list(
theta = runif(2,0,1),
lambda1 = runif(1,0.01,20),
diff = runif(1,0.01,20)
)
#parameters <- c("theta","lambda","z")
parameters <- c("theta","lambda1","diff","lambda")
#MCMC settings
ni <- 20000
nb <- ni/2
nt <- 10
nc <- 10
Out.jags2 <- jags.model(
file = "Model_jags2.txt",
data = data.jags,
inits = list(inits,inits,inits,inits,inits,inits,inits,inits,inits,inits),
n.chain = nc)
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 100
Unobserved stochastic nodes: 104
Total graph size: 1006
Initializing model
update(Out.jags2, nb)
mcmc.jags2 <- coda.samples(
Out.jags2,
parameters,
n.iter = ni-nb,
thin = nt
)
summary(mcmc.jags2)
Iterations = 11010:21000
Thinning interval = 10
Number of chains = 10
Sample size per chain = 1000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
diff 4.5463 0.8888 0.008888 0.011405
lambda[1] 4.9213 0.5299 0.005299 0.009102
lambda[2] 9.4676 1.0768 0.010768 0.017364
lambda1 4.9213 0.5299 0.005299 0.009102
theta[1] 0.8211 0.1066 0.001066 0.001725
theta[2] 0.3352 0.1534 0.001534 0.002054
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
diff 2.9669 3.9539 4.4816 5.0612 6.5089
lambda[1] 3.7865 4.5780 4.9489 5.3019 5.8676
lambda[2] 7.6983 8.7130 9.3453 10.1085 11.8657
lambda1 3.7865 4.5780 4.9489 5.3019 5.8676
theta[1] 0.5632 0.7655 0.8411 0.8982 0.9683
theta[2] 0.0977 0.2215 0.3142 0.4264 0.6880
plot(mcmc.jags2)
library(TMB)
The likelihood is defined as the product of conditional marginal distribution as follows. Let \(\mathscr{F}_{t}\) denote a set of past observed and laternt variables up to time step \(t\).
\[ \begin{array}{lcl} L &=& \displaystyle Pr(Y_t=y_t, Y_{t-1}=y_{t-1},...,Y_1=y_1) \\ &=& \displaystyle \sum_{k_1} \cdots \sum_{k_t} Pr(Y_t=y_t, Z_t=k_t | Z_{t-1}) \cdots Pr(Y_2=y_2, Z_2=k_2 | Z_1=k_1) \cdot Pr(Y_2=y_1, Z_1=k_1)\\ &=& \displaystyle \sum_{k_1} \cdots \sum_{k_t} Pr(Y_t=y_t, Z_t=k_t | \mathscr{F}_{t-1}) Pr(\mathscr{F}_{t-1}) \\ &=& \displaystyle \sum_{k_t} Pr(Y_t=y_t | Z_t=k_t) \left[ \sum_{k_{t-1}} Pr(Z_t=k_t | Z_{t-1}=k_{t-1}) Pr(Y_{t-1}=y_{t-1}, Z_{t-1}=k_{t-1} | \mathscr{F}_{t-2})\right] \\ &=& \displaystyle \sum_{k_t} dpois(y_t|\lambda_{k_t}) \left[ \sum_{k_{t-1}} \Theta_{k_t,k_{t-1}} \Gamma_{t-1,k_{t-1}} \right] \\ &=& \cdots \end{array} \] where \[ \displaystyle \Theta_{j,k} = Pr(Z_t=k | Z_{t-1}=j), \\ \Gamma_{t,k} = Pr(Y_t=y_t, Z_t=k| \mathscr{F}_{t-1}). \]
TMB_Model <-
"#include <TMB.hpp>
template<class Type>
Type objective_function<Type>::operator()()
{
DATA_INTEGER(Ts);
DATA_INTEGER(Ks);
DATA_VECTOR(y);
PARAMETER_VECTOR(log_lambda);
PARAMETER_VECTOR(logit_theta);
vector<Type> lambda(Ks);
matrix<Type> theta(Ks,Ks);
vector<Type> acc(Ks);
matrix<Type> log_gam(Ts,Ks);
for(int k=0;k<Ks;k++) lambda(k)=exp(log_lambda(k));
for(int j=0;j<Ks;j++){
theta(j,0)=1.0/(1+exp(-logit_theta(j)));
theta(j,1)=1-theta(j,0);
}
Type NLL;
NLL=0.0;
for(int k=0;k<Ks;k++) {
log_gam(0,k) = dpois(y(0),lambda(k),true);
}
for(int t=1;t<Ts;t++){
for(int k=0;k<Ks;k++){
for(int j=0;j<Ks;j++){
acc(j) = log_gam(t-1,j) + log(theta(j,k));
}
log_gam(t,k) = dpois(y(t),lambda(k),true) + log(exp(acc(0))+exp(acc(1)));
}
}
for(int t=0;t<Ts;t++){
NLL += (-1.0)*log(exp(log_gam(t,0))+exp(log_gam(t,1)));
}
ADREPORT(lambda);
ADREPORT(theta)
return NLL;
}
"
writeLines(TMB_Model, con="mmpp.cpp")
compile("mmpp.cpp")
[1] 0
dyn.load(dynlib("mmpp"))
TMB.data <- list(Ts=Ts, Ks=Ks, y=y)
parameters <- list(log_lambda=log(c(5,10)), logit_theta=c(0,0))
obj <- MakeADFun(TMB.data, parameters, DLL="mmpp", inner.control=list(trace=T))
Constructing atomic D_lgamma
opt <-nlminb(obj$par, obj$fn, obj$gr, control=list(trace=0))
outer mgc: 1048.156
outer mgc: 278.8088
outer mgc: 104.2356
outer mgc: 83.02394
outer mgc: 91.16141
outer mgc: 105.0223
outer mgc: 361.2221
outer mgc: 293.7457
outer mgc: 214.0508
outer mgc: 9.395882
outer mgc: 21.50229
outer mgc: 2.475015
outer mgc: 10.47575
outer mgc: 5.269842
outer mgc: 3.76246
outer mgc: 1.157089
outer mgc: 0.008499924
outer mgc: 0.001887102
sdr <- sdreport(obj)
outer mgc: 0.001887102
outer mgc: 7.755136
outer mgc: 7.731613
outer mgc: 11.32977
outer mgc: 11.34609
outer mgc: 0.7389029
outer mgc: 0.7352069
outer mgc: 0.801833
outer mgc: 0.8022733
outer mgc: 9.450017
pl <- as.list(sdr,"Est"); pl
$log_lambda
[1] 1.602469 2.246017
$logit_theta
[1] 1.4406821 -0.8776274
attr(,"what")
[1] "Estimate"
plsd <- as.list(sdr,"Std"); plsd
$log_lambda
[1] 0.01959044 0.01532953
$logit_theta
[1] 0.1178012 0.1025536
attr(,"what")
[1] "Std. Error"
data.frame(Name=names(sdr$value), Est=sdr$value, SE=sdr$sd)
Name Est SE
1 lambda 4.9652774 0.09727194
2 lambda 9.4500174 0.14486433
3 theta 0.8085603 0.01823452
4 theta 0.2936697 0.02127248
5 theta 0.1914397 0.01823452
6 theta 0.7063303 0.02127248
dyn.unload(dynlib("mmpp"))
Warning: 5 external pointers will be removed