# msbvar() and related functions
# Patrick T. Brandt
# 20081113 : Initial version
# 20120110 : Updated to use new block optimization function by Ryan
#            Davis.  This implements the SWZ block algorithm to get
#            the MLE of the MSVAR and then sets this up for Gibbs
#            sampler.
# 20120112 : Updated to setup some smart starting values and allow the
#            users to input them.  Some error checking is included as
#            well.  Also users multiple random and user input methods
#            to generate starting values for MCMC

# Workhorse msbvar() function with SZ prior to initialize the posterior
# of the Gibbs sampler for the MSBVAR models.

msbvar <- function(Y, z=NULL, p, h,
                   lambda0, lambda1, lambda3,
                   lambda4, lambda5, mu5, mu6, qm,
                   alpha.prior=100*diag(h) + matrix(2, h, h),
                   prior=0, max.iter=40, initialize.opt=NULL)
{
    # Get the number of equations
    m <- ncol(Y)

    # This should be part of a sanity.check.msbvar
    if(h==1)
    {
        stop("\n\n\t -- For MSBVAR models, h>1.  Otherwise, just for a BVAR or VAR!\n")
    }

    ########################################################
    # Before the loop this is all initialization for the EM
    # implementation of the Bayesian model.
    ########################################################

    # As default, do a baseline, non-regime model using szbvar() since
    # this gives us all of the inputs we need for later.  User can
    # input their own function / object to do something different as
    # long as it conforms.  See docs for details.

    if(is.null(initialize.opt)==TRUE)  # User provided nothing
    {
        # Inherits all from inputs.
        setup <- initialize.msbvar(Y, p, z, lambda0, lambda1,
                                   lambda3, lambda4, lambda5, mu5,
                                   mu6, nu=m, qm, prior, h,
                                   Q=NULL)

        init.model <- setup$init.model
        Qhat.start <- setup$Qhat.start
        thetahat.start <- setup$thetahat.start
    } else {
    # This is the user input one, so we need to validate it
        if(inherits(initialize.opt$init.model, "BVAR")==FALSE)
        {
            stop("msbvar() initialize.opt list must have an object named init.model of class BVAR.  Create this using szbvar()\n")
        }

        # Check dim of thetahat.start
        tmp <- dim(initialize.opt$thetahat.start)
        if(tmp[1]!=m)
        {
            stop("initialize.opt$thetahat.start has the wrong number of rows\n")
        }
        if(tmp[2]!=(1+m*p+m))
        {
            stop("initialize.opt$thetahat.start has the wrong number of columns\n")
        }
        if(tmp[3]!=h)
        {
            stop("initialize.opt$thetahat.start has the wrong array dimension\n")
        }

       # Validate an initial Q from the user

        if(sum(initialize.opt$Qhat.start)!=h)
        {
            stop("msbvar(): Improper initial Q, transition matrix, for the MS process.  Rows must sum to 1.\n")
        }

        # If we get to here, the input object is valid.  So assign for
        # use
        init.model <- initialize.opt$init.model
        Qhat.start <- initialize.opt$Qhat.start
        thetahat.start <- initialize.opt$thetahat.start

        # Check dim of alpha.prior
        tmp <- dim(alpha.prior)
        if(tmp[1]!=h)
        { stop("msbvar(): Incorrect number of rows in alpha.prior") }

        if(tmp[2]!=h)
        { stop("msbvar(): Incorrect number of columns in alpha.prior")
        }

    }  # End validations of user inputs

    #################################################################
    # EM-blkopt algorithm -- all handled in the Fortran code
    #################################################################

    indms <- 'IAH'   # Right now, user does not have choice over what
                     # switches.  Allow I = Intercepts, A = AR, H =
                     # Variances.

    mlemod <- blkopt(Y, p, thetahat.start, Qhat.start, niter=max.iter,
                     indms)

    # Do setup of the object we want to feed into the posterior
    # sampler, gibbs.msbvar().  This is basically a final pass at
    # setting up the moment matrices for the regressions with the
    # appropriate priors.

    hreg <- setupGibbs(mlemod, Y, init.model)

    # Now re-assign the objects and set up the output

    output <- list(init.model=init.model,
                   hreg=hreg,
                   Q=mlemod$Qhat,
                   fp=mlemod$fpH,
                   m=m, p=p, h=h,
                   alpha.prior=alpha.prior)

    class(output) <- c("MSVARsetup")
    attr(output, "eqnames") <- colnames(Y) # Get variable names for
                                           # attr
    return(output)
}

############################################################
# blkopt() -- the block optimizer for MSBVAR and MS models
############################################################

blkopt <- function(Y, p, thetahat.start, Qhat.start, niter=10, indms) {

    m <- ncol(Y)
    n <- nrow(Y) # later, definition of n will change
    h <- nrow(Qhat.start)

  ##################################################
  ### Setup Regression Matrices
  ##################################################

  # setup LHS Y as Yregmat (Y regression matrix)
    Yregmat <- matrix(Y[(p+1):n,], ncol=m)

  # setup lagged Y's on RHS for AR coefficients, which we define as
  # Xregmat = [Y_t-1, Y_t-2, ..., Y_t-p] (X regression matrix)
  # hence, we will have n-p observations
    Xregmat <- NULL
    if (p>0) { for (i in 1:p) { Xregmat <- cbind(Xregmat, Y[(p-i+1):(n-i),]) } }
    Xregmat <- cbind(rep(1,n-p), Xregmat)  # add intercept on front


  ##################################################
  ### Initialize
  ##################################################

    beta0.it <- array(NA, c(m,1,h))
    beta0.it[,,] <- thetahat.start[,1,]

    betap.it <- NULL
    if (p>0) {
    # rows correspond to equation
    # columns ordered by lag order, then each variable
        betap.it <- array(thetahat.start[,2:(1+p*m),], c(m,m*p,h))
    }

  sig2.it  <- array(thetahat.start[,(1+m*p+1):ncol(thetahat.start),], c(m,m,h))

  theta.it <- array(NA, c(m,1+m*p+m,h))
  theta.it[,1,] <- beta0.it
  if (p>0) theta.it[,2:(1+m*p),] <- betap.it
  theta.it[,(1+m*p+1):ncol(theta.it),] <- sig2.it

  Qhat.it  <- Qhat.start
  # Qhat is in last h columns

  # add one to store llfval for starting values
  llfval <- rep(0,niter+1)

  # run filter to get LLF value for starting values

  # First, obtain residuals
  # e[,,i] is an (n-p) x m matrix containing conditional means
  # for regime i, i=1,2,...,h (e's third dimension is regime)
  e <- array(NA, c(n-p, m, h))
  betahat <- array(NA, c(m,1+m*p,h))
  betahat[,1,] <- beta0.it
  if (p>0) betahat[,2:(1+m*p),] <- betap.it
  # adjust for univariate vs. multivariate case
  if (m>1) {
    for (i in 1:h) { e[,,i] <- Yregmat - Xregmat %*% t(betahat[,,i]) }
  } else {
    for (i in 1:h) { e[,,i] <- Yregmat - Xregmat %*% betahat[,,i] }
  }

  # Using residuals, get filtered regime probabilities
  # HamFilt <- filter.Hamresid(e, sig2.it, Qhat.it)  # R code
  firstHamFilt <- .Fortran("HamiltonFilter",
                           bigt=as.integer(n),
                           m = as.integer(m), p = as.integer(p),
                           h = as.integer(h),
                           e = e,
                           sig2 = sig2.it,
                           Qhat = Qhat.it,
                           f = double(1),
                           filtprSt =
                           matrix(0,as.integer(n-p),as.integer(h))
                           )

  llfval[1] <- firstHamFilt$f

  cat('Initial Log Likelihood Value:', llfval[1] ,'\n', sep=" ")

  ##################################################
  ### Begin Blockwise Optimization
  ##################################################

  cat('Starting blockwise optimization over', niter, 'block optimizations...\n')

  # blocks: intercept(s), AR coefficient(s), variance/Sigma
  for (iter in 1:niter) {
    # intercepts
    beta0.it <- optim(par=c(beta0.it), fn=llf.msar, Y=Yregmat, X=Xregmat, p=p,
                      theta=theta.it, Q=Qhat.it, optstr='beta0',
                      ms.switch=indms, method="BFGS")$par

    if (length(grep('I', indms)) == 0) beta0.it <- array(beta0.it[1:m], c(m,1,h))

    theta.it[,1,] <- array(beta0.it, c(m,1,h))

    # AR coefficients
    if (p>0) {
      betap.it <- optim(par=c(betap.it), fn=llf.msar, Y=Yregmat,
                        X=Xregmat, p=p,
                        theta=theta.it, Q=Qhat.it, optstr='betap',
                        ms.switch=indms, method="BFGS")$par

      # need to check below line...
      if (length(grep('A', indms)) == 0) betap.it <- array(betap.it[1:(m*p)], c(m,m*p,h))
      theta.it[,2:(1+m*p),] <- array(betap.it, c(m,m*p,h))
    }

    # variance/Sigma
    if (m==1) {
      # AR (univariate)
      sig2.it <- optim(par=c(sig2.it), fn=llf.msar, Y=Yregmat, X=Xregmat, p=p,
                       theta=theta.it, Q=Qhat.it, optstr='sig2',
                       ms.switch=indms, method="BFGS")$par

      if (length(grep('H', indms)) == 0) { sig2.it <- array(sig2.it[1], c(m,m,h)) }  # adjust if variance does not switch
      sig2.it <- array(sig2.it, c(m,m,h))
    } else {
      # VAR, only optimize over distinct elements of Sigma for each regime
      sig2.lower <- sig2.it[,,][lower.tri(sig2.it[,,1], diag=TRUE)]
      sig2.lower <- optim(par=c(sig2.lower), fn=llf.msar, Y=Yregmat,
                          X=Xregmat, p=p,
                          theta=theta.it, Q=Qhat.it, optstr='sig2',
                          ms.switch=indms, method="BFGS")$par

      sig2.it  <- array(NA, c(m,m,h))
      # number of distinct: m*(m+1)/2
      nd <- (m*(m+1)/2)
      for (i in 1:h) {
        low <- sig2.lower[(1+(i-1)*nd):(nd+(i-1)*nd)]
        sig2.it[,,i] <- xpnd(low, nrow=m)  # user-defined function below
      }
      if (length(grep('H', indms)) == 0) { sig2.it <- array(sig2.it[,,1], c(m,m,h)) }  # only want first returned value
    }
    theta.it[,(1+m*p+1):ncol(theta.it),] <- sig2.it

    # transition matrix
    Qhat.it <- optim(par=c(Qhat.it[,1:(h-1)]), fn=llf.msar, Y=Yregmat,
                     X=Xregmat, p=p,
                     theta=theta.it, Q=Qhat.it, optstr='Qhat',
                     ms.switch=indms, method="BFGS")$par

    Qhat.it <- matrix(Qhat.it, nrow=h, ncol=h-1)
    Qhat.it <- cbind(Qhat.it, 1-rowSums(Qhat.it))

    # obtain value of likelihood function and store it
    llfval[iter+1] <- -llf.msar(param.opt=beta0.it, Y=Yregmat,
                                X=Xregmat, p=p,
                                theta=theta.it, Q=Qhat.it,
                                optstr='beta0', ms.switch='TOTAL')

    cat('Completed iteration ', iter, ' of ', niter,
        '. Log Likelihood Value: ',
        llfval[iter+1], '\n', sep="")

  }

  # run filter one last time using final parameter estimates

  # First, obtain residuals
  # e[,,i] is an (n-p) x m matrix containing conditional means
  # for regime i, i=1,2,...,h (e's third dimension is regime)
  e <- array(NA, c(n-p, m, h))
  betahat <- array(NA, c(m,1+m*p,h))
  betahat[,1,] <- beta0.it
  if (p>0) betahat[,2:(1+m*p),] <- betap.it
  # adjust for univariate vs. multivariate case
  if (m>1) {
    for (i in 1:h) { e[,,i] <- Yregmat - Xregmat %*% t(betahat[,,i]) }
  } else {
    for (i in 1:h) { e[,,i] <- Yregmat - Xregmat %*% betahat[,,i] }
  }

  # Using residuals, get filtered regime probabilities
  # HamFilt <- filter.Hamresid(e, sig2.it, Qhat.it)  # R code
  lastHamFilt <- .Fortran("HamiltonFilter",
         bigt=as.integer(n),
         m = as.integer(m), p = as.integer(p), h = as.integer(h),
         e = e,
         sig2 = sig2.it,
         Qhat = Qhat.it,
         f = double(1),
         filtprSt = matrix(0,as.integer(n-p),as.integer(h))
         )

  fpH <- lastHamFilt$filtprSt

  output <- list(theta=theta.it,
                 Qhat=Qhat.it,
                 llfval=llfval,
                 fpH=fpH,
                 m=m, p=p, h=h)

  class(output) <- c('blkopt')

  return(output)

}


llf.msar <- function(param.opt, Y, X, p, theta, Q, optstr, ms.switch) {

  m <- ncol(Y)
  n <- nrow(Y) + p
  h <- nrow(Q)

  # initially assign values from theta
  beta0 <- array(theta[,1,], c(m,1,h))
  betap <- NULL
  if (p > 0) betap <- array(theta[,2:(1+m*p),], c(m,m*p,h))
  sig2  <- array(theta[,(1+m*p+1):ncol(theta),], c(m,m,h))
  Qhat  <- Q

  # now choose the parameter over which we are optimizing
  if (optstr=='beta0') {
    beta0 <- array(param.opt, c(m,1,h))
  } else if (optstr=='betap') {
    if (p > 0) betap <- array(param.opt, c(m,m*p,h))
  } else if (optstr=='sig2') {
      sig2  <- array(NA, c(m,m,h))
      # number of distinct: m*(m+1)/2
      nd <- (m*(m+1)/2)
      for (i in 1:h) {
        low <- param.opt[(1+(i-1)*nd):(nd+(i-1)*nd)]
        sig2[,,i] <- xpnd(low, nrow=m)  # user-defined function below
      }
  } else if (optstr=='Qhat') {
    # only passing in first h-1 columns, so add column
    Qhat <- matrix(param.opt, nrow=h, ncol=h-1)
    Qhat <- cbind(Qhat, 1-rowSums(Qhat))
  }

  # numerical checks on Q matrix
  # prevents elements in Q from going negative or greater than 1
  # if that occurs during optimization, then just set to previous Q
  if ( (min(Qhat) <= 0.0001) || (max(Qhat) >= 0.9999 )) Qhat <- Q

  # numerical checks on sig2
  # prevents elements in sig2 from going negative
  # if that occurs during optimization, then just set to previous sig2
  if ( (min(sig2) <= 0.0001) ) sig2 <- array(theta[,(1+m*p+1):ncol(theta),], c(m,m,h))
  # constrain max(off-diagonal) to be less than min(diagonal)
  blnUseOld <- FALSE
  if (m>1) {
    blnSetPast = 0
    for (i in 1:h) {
      if (max(sig2[,,i][lower.tri(sig2[,,i], diag=FALSE)]) > min(diag(sig2[,,i]))) blnSetPast = 1
    }
    if (blnSetPast == 1) blnUseOld <- TRUE
  }
  if (blnUseOld==TRUE) sig2 <- array(theta[,(1+m*p+1):ncol(theta),], c(m,m,h))

  # by default, everything switches, so now adjust by
  # assigning values that do not switch to first state
  # intercept only
  if (ms.switch=='I') {
    if (p > 0) betap <- array(betap[,,1], c(m,m*p,h))
    sig2  <- array(sig2[,,1], c(m,m,h))
  } else if (ms.switch=='H') { # heteroskedastic
    beta0 <- array(beta0[,,1], c(m,1,h))
    if (p > 0) betap <- array(betap[,,1], c(m,m*p,h))
  } else if (ms.switch=='A') {
    beta0 <- array(beta0[,,1], c(m,1,h))
    sig2  <- array(sig2[,,1], c(m,m,h))
  } else if (ms.switch=='IA') { # homoskedastic
    sig2  <- array(sig2[,,1], c(m,m,h))
  } else if (ms.switch=='IH') {
    if (p > 0) betap <- array(betap[,,1], c(m,m*p,h))
  } else if (ms.switch=='AH') {
    beta0 <- array(beta0[,,1], c(m,1,h))
  }


  #############################################
  # Filtering section
  # Note: there is both Fortran and native R
  #       code in the package (they parallel).
  # Use Fortran for speed, but R for
  # pedagogical purposes.
  #############################################

  # First, obtain residuals
  # e[,,i] is an (n-p) x m matrix containing conditional means
  # for regime i, i=1,2,...,h (e's third dimension is regime)
  e <- array(NA, c(n-p, m, h))
  betahat <- array(NA, c(m,1+m*p,h))
  betahat[,1,] <- beta0
  if (p>0) betahat[,2:(1+m*p),] <- betap
  # adjust for univariate vs. multivariate case
  if (m>1) {
    for (i in 1:h) { e[,,i] <- Y - X %*% t(betahat[,,i]) }
  } else {
    for (i in 1:h) { e[,,i] <- Y - X %*% betahat[,,i] }
  }

  # Using residuals, get filtered regime probabilities
  # HamFilt <- filter.Hamresid(e, sig2.it, Qhat.it)  # R code
  HamFilt <- .Fortran("HamiltonFilter",
         bigt=as.integer(n),
         m = as.integer(m), p = as.integer(p), h = as.integer(h),
         e = e,
         sig2 = sig2,
         Qhat = Qhat,
         f = double(1),
         filtprSt = matrix(0,as.integer(n-p),as.integer(h))
         )

  f <- HamFilt$f

  return(-f) # optim() minimizes negative

}


# This needs to be implemented in C // C++ code later
BHLK.smoother <- function(fp, Q)
{
    TT <- nrow(fp)
    h <- ncol(fp)
    p.smooth <- matrix(0, TT, h)

    p.smooth[TT,] <- fp[TT,]

    for(tt in (TT-1):1)
    {
        p.predict <- Q%*%fp[tt,]
        p.smooth[tt,] <- crossprod(Q, (p.smooth[tt+1,]/p.predict))*fp[tt,]
    }

    return(p.smooth)
}

hregime.setup <- function(h, m, p, TT)
{

    Bk <- array(0, c(m*p+1, m, h))
    Sigmak <- array(0, c(m,m,h))
    df <- matrix(0, 1, h)
    e <- array(0, c(TT, m, h))
    return(list(Bk=Bk, Sigmak=Sigmak, df=df, e=e))
}


setupGibbs <- function(blkoptobj, Y, init.model)
{

  n <- nrow(Y)
  h <- blkoptobj$h
  m <- blkoptobj$m
  p <- blkoptobj$p

  # now, setup hreg, adjusting for dummies
  hreg <- hregime.reg2.mle(h, m, p, TT=(n-p), fp=blkoptobj$fpH, init.model)


  return(hreg)
}

# Ryan adjusted several items from the original hregime.reg2 function
hregime.reg2.mle <- function(h, m, p, TT, fp, init.model)
{

    # Storage
    tmp <- vector(mode="list", length=h)
    Bk <- array(0, c(m*p+1, m, h))
    Sigmak <- array(0, c(m,m,h))
    df <- apply(fp, 2, sum)
    e <- array(0, c(TT, m, h))
    Y <- init.model$Y[(m+1+1):nrow(init.model$Y),]
    X <- init.model$X[(m+1+1):nrow(init.model$X),]

    # Loops to compute
    # 1) sums of squares for X and Y
    # 2) B(k) matrices
    # 3) Residuals
    # 4) Sigma(k) matrices

    for(i in 1:h)
    {
        # Note how the dummy obs. are appended to the moment matrices
        Sxy <- crossprod(X, diag(fp[,i]))%*%Y + crossprod(init.model$X[1:(m+1),], init.model$Y[1:(m+1),])
        Sxx <- crossprod(X, diag(fp[,i]))%*%X + crossprod(init.model$X[1:(m+1),])

        # Compute the regression coefficients
        hstar <- Sxx
        Bk[,,i] <- solve(hstar,Sxy,tol=1e-100)

        # Compute residuals and Sigma (based on Krolzig)

        # Get the full residuals -- need these for filtering
        e[,,i] <- Y - X%*%Bk[,,i]

        Sigmak[,,i] <- (crossprod(e[,,i],diag(fp[,i]))%*%e[,,i])/df[i]

        # Save the moments
        tmp[[i]] <- list(Sxy=Sxy, Sxx=Sxx) #, ytmp=ytmp, xtmp=xtmp)
    }

    return(list(Bk=Bk, Sigmak=Sigmak, df=df, e=e, moment=tmp))
}


###########################################################################
# THIS SHOULD BE RENAMED / RE-DEFINED, SINCE EARLIER VERSIONS ARE IN
# THE SVN NOW
###########################################################################
# hregime.reg2
# h  = number of regimes
# m  = number of equations
# p  = number of lags in the VAR
# fp = filter probability matrix, T x h -- where T matches the sample!
# init.model = initial prior / model object produced by szbvar()
###########################################################################

hregime.reg2 <- function(h, m, p, fp, init.model)
{

    # Storage
    tmp <- vector(mode="list", length=h)
    Bk <- array(0, c(m*p+1, m, h))
    Sigmak <- array(0, c(m,m,h))
    df <- colSums(fp)
    e <- array(0, c(nrow(fp)+m+1, m, h))

    # New version -- just pad in to get the dummy obs.  Use the full
    # design matrix and require first m+1 dummy obs are in each
    # regime.
    Y <- init.model$Y; X <- init.model$X  # So the dummy obs. are
                                          # included here, as defined
                                          # by init.model <- szbvar().

    # Pad the filter probabilities so that each regime gets the dummy
    # observations.

    fp <- rbind(matrix(1, m+1, h), fp)

    # Loop to compute
    # 1) Sums of squares for X and Y for each regime
    # 2) B(k) matrices
    # 3) Residuals
    # 4) Sigma(k) matrices

    for(i in 1:h)
    {
        # Note how the dummy obs. are appended to the moment matrices above,
        # so we no longer need to handle these separately as sums
        # inside of each cross-product computation.  See earlier SVN
        # commits to see how this was done (badly) in earlier iterations.

        # Compute the cross products we need..
        # Last one here is so we can avoid ((X'X)^{-1} \ccross Sigma)
        # or the need  to do inverses on potentially sample degenerate
        # values in a regime with too few observations to compute a
        # p.d. version of X'X.

        # Subset the data for each regime
        X1 <- X[fp[,i]==1,]
        Y1 <- Y[fp[,i]==1,]

        Sxy <- crossprod(X1,Y1)
        Sxx <- crossprod(X1)

        ## Sxy <- crossprod(X, diag(fp[,i]))%*%Y
        ## Sxx <- crossprod(X, diag(fp[,i]))%*%X
        ## Syx <- crossprod(Y, diag(fp[,i]))%*%X

        hstar <- Sxx + init.model$H0  # Regression X'X + Prior

        # Compute regressions for a state using QR
        #
        # Note this includes the priors because of the padding in the
        # state-space matrices for fp.

        Bk[,,i] <- qr.coef(qr(hstar), Sxy + init.model$H0[,1:m])

        # Compute residuals and Sigma (based on Krolzig)

        # Get the full residuals -- need these for filtering -- note
        # we do not subset here!
        e[,,i] <- (Y - X%*%Bk[,,i])

        # Now compute the VCOV of the errors.  Note this can be done
        # in several ways for the posterior.  Want to use the
        # regime-specific, sample identified values. This depends on
        # having appropriate df and variation in the error
        # covariance.  Below assumes this is true.  Can do this also
        # with the classic Zellner formula of S0 + Y'Y + H0 - B'(X'X +
        # H0)B, adjusted by df, but this is numerically unstable in
        # some regime classifications.

        # Note this can also handle degenerate regimes (i.e., too few
        # obs.) since the prior will dominate the following
        # computation for the sample variance in a regime and be p.d.

        esub <- as.matrix(e[,,i])
        esub <- esub[fp[,i]==1,]
        Sigmak[,,i] <- (init.model$S0 + init.model$H0[1:m,1:m] +
                        crossprod(esub))/(df[i]+m+1)

        # Save the moments
        tmp[[i]] <- list(Sxy=Sxy, Sxx=Sxx)
    }

    # Only return the observation relevant residuals in "e"
    return(list(Bk=Bk, Sigmak=Sigmak, df=df, e=e[(m+2):nrow(e),,],
                moment=tmp))
}

# Count the number of state transitions for the n(ij) for the
# state-space updates.
count.transitions <- function(s)
  { M <- ncol(s)
    TT <- nrow(s)
    s <- crossprod(t(s), as.matrix(seq(1:M)))
    sw <- matrix(0, M, M)
    for (t in 2:TT)
      { st1 <- s[t-1]
        st <- s[t]
        sw[st1,st] <- sw[st1, st] + 1
      }
    return(sw)
  }