library(knitr)
library(rmarkdown)
library(kableExtra)
library(ggplot2)
library(ggplot2)
library(ggfortify)
library(tidyverse)

TK-memo

  • WinBugsやJAGSは離散的な潜在変数モデルと相性が良いが,TMBやStanは潜在変数に連続変数しか許さないため,一見するとhidden Markov modelには適用しにくいと考えられるが,潜在変数を周辺化すれば問題なく利用できる.さらに当然のことながら,TMBなら計算時間も格段早くなる.

  • TMBとstanを同じプロジェクト内で利用すると問題が生じるので,別資料とした.

1 Markov Modulated Poisson Process

The hidden Markov model (HMM) is a special case of the state-space model when the state variable takes discrete values. The Markov modulated poisson process (MMPP) is a special case of the HMM, where the observation is distributed to the poisson distribution.

1.1 Model definition

The model used in this model is defined as an intutive way as follows:

Observation model: \[ Y_t|_{\mu_t} \sim Po(\mu_t) \]

State model:

\[\begin{equation} \mu_t |_{\mu_{t-1} = \lambda_1} = \left\{ \begin{array}{l} \lambda_1 \quad \mbox{with prob} \quad \theta_{11} \\ \lambda_2 \quad \mbox{with prob} \quad \theta_{12} \end{array} \right. \\ \mu_t |_{\mu_{t-1} = \lambda_2} = \left\{ \begin{array}{l} \lambda_1 \quad \mbox{with prob} \quad \theta_{21} \\ \lambda_2 \quad \mbox{with prob} \quad \theta_{22} \end{array} \right. \end{equation}\]

The model above can also be written using latent variables (\(z\)’s) as follows. This formulation is more convenient for “WinBugs” or “JAGS” coding.

Observation model: \[ Y_t|_{z_t=k} \sim Po(\lambda_k) \quad (k=1,\dots,K) \]

State model: \[ z_t |_{z_{t-1}=j} \sim Cat((1,\dots,K), \theta_1=(\theta_{j1},\dots,\theta_{jK})), \quad \sum_{k=1}^K \theta_{jk}=1 \]

1.2 Generation of simulation data

Example data can be generated according to the model shown above.

lambda1 = 5
lambda2 = 10
theta1 = c(0.9, 0.1)
theta2 = c(0.2, 0.8)
theta = rbind(theta1,theta2)

Ts = 100
Time = 1:Ts
y = numeric(Ts)
mu = numeric(Ts)

set.seed(2020)
rv = runif(Ts, 0,1)

mu[1] = sample(c(lambda1,lambda2), 1)
for(t in 2:Ts){ 
 u = rv[t]
 if(mu[t-1]==lambda1) mu[t] = (u<theta[1,1])*lambda1 + (u>=theta[1,1])*lambda2
 if(mu[t-1]==lambda2) mu[t] = (u<theta[2,1])*lambda1 + (u>=theta[2,1])*lambda2
}
y = rpois(Ts,mu)

plot(Time, y, type="l", ylab="Observation")
#abline(lambda1,0,lty=2,col="red")
#abline(lambda2,0,lty=2,col="red")
points(Time, mu, type="l", lty=2, lwd=1.5, col="red")

Alternatively, the same distribution can be generated as follows. You can confirm the both are same.

Ks = 2
z = numeric(Ts)
set.seed(2020)
rv = runif(Ts, 0,1)

z[1] = sample(1:2, 1)
for(t in 2:Ts){ 
 u = rv[t]
 for(k in 1:Ks){
  if(z[t-1]==k) z[t] = (u<theta[k,1])*1 + (u>=theta[k,1])*2
 }
}
y = rpois(Ts,mu)

plot(Time, y, type="l", ylab="Observation")
#abline(lambda1,0,lty=2,col="red")
#abline(lambda2,0,lty=2,col="red")
points(Time, mu, type="l", lty=2, lwd=1.5, col="red")

2 JAGS computation

2.1 Set up

#install.packages("rjags")
library(rjags)

2.2 Naive attempt

2.2.1 Definition of model in JAGS

Model_jags1 <-
"model {

  # hyper-parameters
  alpha[1] <- 0.5
  alpha[2] <- 0.5
  
  # priors
  for (k in 1:Ks) {
    theta[k] ~ dbeta(1,1)
    lambda[k] ~ dgamma(a,b)
  }
  
  # likelihood for latent variable z
  z[1] ~ dcat(alpha)
  for(t in 2:Ts) {
    z[t] ~ dcat(tmp[t,])
    tmp[t,1] <- (z[t-1]==1)*theta[1] + (z[t-1]==2)*theta[2]
    tmp[t,2] <- 1-tmp[t,1]
  }

  # likelihood for observation y
  for(t in 1:Ts) {
      y[t] ~ dpois(lambda[z[t]])
  }
  
}"
writeLines(Model_jags1, con="Model_jags1.txt")

2.2.2 MCMC setting

I intentionally used a large number of chains here.

#Data
data.jags <- list(Ts=Ts,Ks=Ks,y=y,a=0.001,b=0.001)

#Initial values
inits <- list(
  theta = runif(2,0,1),
  lambda = runif(2,0.01,20)
  )
#parameters <- c("theta","lambda","z")
parameters <- c("theta","lambda")

#MCMC settings
ni <- 20000
nb <- ni/2
nt <- 10
nc <- 10

2.2.3 MCMC sampling

Out.jags1 <- jags.model(
  file = "Model_jags1.txt",
  data = data.jags,
  inits = list(inits,inits,inits,inits,inits,inits,inits,inits,inits,inits),
  n.chain = nc)
Compiling model graph
   Resolving undeclared variables
   Allocating nodes
Graph information:
   Observed stochastic nodes: 100
   Unobserved stochastic nodes: 104
   Total graph size: 1005

Initializing model
update(Out.jags1, nb)
mcmc.jags1 <- coda.samples(
  Out.jags1,
  parameters,
  n.iter = ni-nb,
  thin = nt
)

2.2.4 Results

summary(mcmc.jags1)

Iterations = 11010:21000
Thinning interval = 10 
Number of chains = 10 
Sample size per chain = 1000 

1. Empirical mean and standard deviation for each variable,
   plus standard error of the mean:

            Mean     SD Naive SE Time-series SE
lambda[1] 4.9022 0.5319 0.005319       0.008861
lambda[2] 9.5387 1.1005 0.011005       0.017561
theta[1]  0.8217 0.1086 0.001086       0.001719
theta[2]  0.3398 0.1573 0.001573       0.002142

2. Quantiles for each variable:

            2.5%    25%    50%     75%   97.5%
lambda[1] 3.7987 4.5554 4.9354  5.2781  5.8527
lambda[2] 7.7276 8.7400 9.4210 10.2184 11.9842
theta[1]  0.5554 0.7675 0.8433  0.9000  0.9665
theta[2]  0.1006 0.2221 0.3161  0.4356  0.7045
plot(mcmc.jags1)

#pdf("plot_JAGS.pdf")
#plot(mcmc_JAGS)
#dev.off()

You can find the so-called “label switching”.

2.3 Dealing with “label switching”

A quick treatment for (\(\lambda_1 < \lambda_2\)) :

\[ \lambda_2 = \lambda_1 + \Delta \]

2.3.1 Definition of model

Model_jags2 <-
"model {

  # hyper-parameters
  alpha[1] <- 0.5
  alpha[2] <- 0.5
  
  # priors
  for (k in 1:Ks) {
    theta[k] ~ dbeta(1,1)
  }
  
  lambda1 ~ dgamma(a,b)
  diff ~ dgamma(a,b)
  lambda[1] <- lambda1
  lambda[2] <- lambda1 + diff
  
  # likelihood for latent variable z
  z[1] ~ dcat(alpha)
  for(t in 2:Ts) {
    z[t] ~ dcat(tmp[t,])
    tmp[t,1] <- (z[t-1]==1)*theta[1] + (z[t-1]==2)*theta[2]
    tmp[t,2] <- 1-tmp[t,1]
  }

  # likelihood for observation y
  for(t in 1:Ts) {
      y[t] ~ dpois(lambda[z[t]])
  }
  
}"
writeLines(Model_jags2, con="Model_jags2.txt")

2.3.2 MCMC setting

#Initial values
inits <- list(
  theta = runif(2,0,1),
  lambda1 = runif(1,0.01,20),
  diff = runif(1,0.01,20)
  )
#parameters <- c("theta","lambda","z")
parameters <- c("theta","lambda1","diff","lambda")

#MCMC settings
ni <- 20000
nb <- ni/2
nt <- 10
nc <- 10

2.3.3 MCMC sampling

Out.jags2 <- jags.model(
  file = "Model_jags2.txt",
  data = data.jags,
  inits = list(inits,inits,inits,inits,inits,inits,inits,inits,inits,inits),
  n.chain = nc)
Compiling model graph
   Resolving undeclared variables
   Allocating nodes
Graph information:
   Observed stochastic nodes: 100
   Unobserved stochastic nodes: 104
   Total graph size: 1006

Initializing model
update(Out.jags2, nb)
mcmc.jags2 <- coda.samples(
  Out.jags2,
  parameters,
  n.iter = ni-nb,
  thin = nt
)
summary(mcmc.jags2)

Iterations = 11010:21000
Thinning interval = 10 
Number of chains = 10 
Sample size per chain = 1000 

1. Empirical mean and standard deviation for each variable,
   plus standard error of the mean:

            Mean     SD Naive SE Time-series SE
diff      4.5463 0.8888 0.008888       0.011405
lambda[1] 4.9213 0.5299 0.005299       0.009102
lambda[2] 9.4676 1.0768 0.010768       0.017364
lambda1   4.9213 0.5299 0.005299       0.009102
theta[1]  0.8211 0.1066 0.001066       0.001725
theta[2]  0.3352 0.1534 0.001534       0.002054

2. Quantiles for each variable:

            2.5%    25%    50%     75%   97.5%
diff      2.9669 3.9539 4.4816  5.0612  6.5089
lambda[1] 3.7865 4.5780 4.9489  5.3019  5.8676
lambda[2] 7.6983 8.7130 9.3453 10.1085 11.8657
lambda1   3.7865 4.5780 4.9489  5.3019  5.8676
theta[1]  0.5632 0.7655 0.8411  0.8982  0.9683
theta[2]  0.0977 0.2215 0.3142  0.4264  0.6880
plot(mcmc.jags2)

3 TMB

3.1 Set up

library(TMB)

3.2 Definition of model

The likelihood is defined as the product of conditional marginal distribution as follows. Let \(\mathscr{F}_{t}\) denote a set of past observed and laternt variables up to time step \(t\).

\[ \begin{array}{lcl} L &=& \displaystyle Pr(Y_t=y_t, Y_{t-1}=y_{t-1},...,Y_1=y_1) \\ &=& \displaystyle \sum_{k_1} \cdots \sum_{k_t} Pr(Y_t=y_t, Z_t=k_t | Z_{t-1}) \cdots Pr(Y_2=y_2, Z_2=k_2 | Z_1=k_1) \cdot Pr(Y_2=y_1, Z_1=k_1)\\ &=& \displaystyle \sum_{k_1} \cdots \sum_{k_t} Pr(Y_t=y_t, Z_t=k_t | \mathscr{F}_{t-1}) Pr(\mathscr{F}_{t-1}) \\ &=& \displaystyle \sum_{k_t} Pr(Y_t=y_t | Z_t=k_t) \left[ \sum_{k_{t-1}} Pr(Z_t=k_t | Z_{t-1}=k_{t-1}) Pr(Y_{t-1}=y_{t-1}, Z_{t-1}=k_{t-1} | \mathscr{F}_{t-2})\right] \\ &=& \displaystyle \sum_{k_t} dpois(y_t|\lambda_{k_t}) \left[ \sum_{k_{t-1}} \Theta_{k_t,k_{t-1}} \Gamma_{t-1,k_{t-1}} \right] \\ &=& \cdots \end{array} \] where \[ \displaystyle \Theta_{j,k} = Pr(Z_t=k | Z_{t-1}=j), \\ \Gamma_{t,k} = Pr(Y_t=y_t, Z_t=k| \mathscr{F}_{t-1}). \]

3.3 TMB coding

TMB_Model <- 
"#include <TMB.hpp>

template<class Type>
Type objective_function<Type>::operator()()
{
  DATA_INTEGER(Ts);
  DATA_INTEGER(Ks);
  DATA_VECTOR(y); 
  
  PARAMETER_VECTOR(log_lambda);
  PARAMETER_VECTOR(logit_theta);
  
  vector<Type> lambda(Ks);
  matrix<Type> theta(Ks,Ks);
  vector<Type> acc(Ks);
  matrix<Type> log_gam(Ts,Ks);
  
  for(int k=0;k<Ks;k++) lambda(k)=exp(log_lambda(k)); 
  for(int j=0;j<Ks;j++){
   theta(j,0)=1.0/(1+exp(-logit_theta(j))); 
   theta(j,1)=1-theta(j,0);
  }

  Type NLL;
  NLL=0.0; 
  
  for(int k=0;k<Ks;k++) { 
    log_gam(0,k) = dpois(y(0),lambda(k),true);
  }
  
  for(int t=1;t<Ts;t++){
   for(int k=0;k<Ks;k++){
    for(int j=0;j<Ks;j++){
     acc(j) = log_gam(t-1,j) + log(theta(j,k));
    }
    log_gam(t,k) = dpois(y(t),lambda(k),true) + log(exp(acc(0))+exp(acc(1)));
   }
  }
  
  for(int t=0;t<Ts;t++){
    NLL += (-1.0)*log(exp(log_gam(t,0))+exp(log_gam(t,1)));
  }
  
  ADREPORT(lambda);
  ADREPORT(theta)
  
  return NLL;
  
}
"

writeLines(TMB_Model, con="mmpp.cpp")

compile("mmpp.cpp") 
[1] 0
dyn.load(dynlib("mmpp"))

3.4 TMB run

TMB.data <- list(Ts=Ts, Ks=Ks, y=y)
parameters <- list(log_lambda=log(c(5,10)), logit_theta=c(0,0)) 

obj <- MakeADFun(TMB.data, parameters, DLL="mmpp", inner.control=list(trace=T))
Constructing atomic D_lgamma
opt <-nlminb(obj$par, obj$fn, obj$gr, control=list(trace=0))
outer mgc:  1048.156 
outer mgc:  278.8088 
outer mgc:  104.2356 
outer mgc:  83.02394 
outer mgc:  91.16141 
outer mgc:  105.0223 
outer mgc:  361.2221 
outer mgc:  293.7457 
outer mgc:  214.0508 
outer mgc:  9.395882 
outer mgc:  21.50229 
outer mgc:  2.475015 
outer mgc:  10.47575 
outer mgc:  5.269842 
outer mgc:  3.76246 
outer mgc:  1.157089 
outer mgc:  0.008499924 
outer mgc:  0.001887102 
sdr <- sdreport(obj)
outer mgc:  0.001887102 
outer mgc:  7.755136 
outer mgc:  7.731613 
outer mgc:  11.32977 
outer mgc:  11.34609 
outer mgc:  0.7389029 
outer mgc:  0.7352069 
outer mgc:  0.801833 
outer mgc:  0.8022733 
outer mgc:  9.450017 
pl <- as.list(sdr,"Est"); pl
$log_lambda
[1] 1.602469 2.246017

$logit_theta
[1]  1.4406821 -0.8776274

attr(,"what")
[1] "Estimate"
plsd <- as.list(sdr,"Std"); plsd
$log_lambda
[1] 0.01959044 0.01532953

$logit_theta
[1] 0.1178012 0.1025536

attr(,"what")
[1] "Std. Error"
data.frame(Name=names(sdr$value), Est=sdr$value, SE=sdr$sd)
    Name       Est         SE
1 lambda 4.9652774 0.09727194
2 lambda 9.4500174 0.14486433
3  theta 0.8085603 0.01823452
4  theta 0.2936697 0.02127248
5  theta 0.1914397 0.01823452
6  theta 0.7063303 0.02127248
dyn.unload(dynlib("mmpp"))
Warning: 5 external pointers will be removed