library(BayesValidate, lib.loc="~/rlibs")
library(mvtnorm)   # Contains the function rmvt for sampling from
                   # multivariate t distributions, and dmvnorm for
                   # the density of multivariate normal distributions

############################################################################
##
##  validateNormalMix
##
##  Run the Cook, Gelman, and Rubin (2006) validation of a normal mixture
##  model.  
##

validateNormalMix <- function(verbose = FALSE, n.CGR.reps=200,
                              threshold = 0.01){

  ## Create the inputs to the Cook and Rubin validation
  
  # Specify the hyperparameter values.  Theoretically any values for these 
  # will lead to a valid test.  

  prior.mean <-                     0
  prior.prec <-                     1
  
  hyper <- c(  prior.mean, prior.prec)
  
  nDims <- 8
  
  # the specifications for the MCMC
  nChains <- 10

  # the inputs for the parameter generation function
  generate.param.inputs <- c(hyper, nDims)
  
  # the inputs for the data generation function
  generate.data.inputs <- c(hyper, nDims, verbose)
  
  # the inputs for the function to draw from the posterior 
  analyze.data.inputs <- list(hyper, nChains, nDims, verbose,
                              n.CGR.reps)

  ## run the validation
  #set.seed(11)
  tst <- validate(generate.param = generate.param.normalMix, 
    generate.param.inputs = generate.param.inputs, 
    generate.data = generate.data.normalMix,
    generate.data.inputs = generate.data.inputs, 
    analyze.data = analyze.data.normalMix, 
    analyze.data.inputs = analyze.data.inputs,
    n.rep = n.CGR.reps, quantilePlot.directory = "./")

  printCGRoutput(tst, "./testResults.txt")
  return(invisible(tst))
  
}



#########################################################################
##  
##  generate.param.normalMix
##
##  Generate the parameters for the normal mixture model from their
##  prior distributions.
##  This function is used as input to the Cook and Rubin validation.
##

generate.param.normalMix <- function(inputs){

  # get the hyperparameters
  prior.mean <-                       inputs[1]
  prior.prec <-                       inputs[2]
  nDims <-                            inputs[3]
    
  # sample the parameter
  theta <- rnorm(n=nDims, mean=prior.mean, sd=(1/sqrt(prior.prec)))
  return(theta)
}


#########################################################################
##  
##  generate.data.normalMix
##
##  Given the parameters of the normal mixture model, generate data from
##  the model.
##  
##  This function is used as input to the Cook and Rubin validation.
##

generate.data.normalMix <- function(params, inputs){
  
  # get the prior mean and precision
  mean <-                               inputs[1]
  prec <-                               inputs[2]
  # get the number of dimensions
  nDims <-                              inputs[3]
  # get an indicator of whether to print information
  verbose <-                            inputs[4]
  # get the parameters
  theta <-                              params
    
  # sample the outcome
  x <- rbinom(n=1, size=1, prob=0.5)
  if (x == 1){
    y <- rnorm(n=nDims, mean=theta, sd=0.01)
  } else {
    y <- rnorm(n=nDims, mean=theta, sd = (1/sqrt(prec)))
  }
  
  if (verbose){
    # print the parameter values
    print("*******************************************************************")
    print("parameter values:")
    print("")
    print("theta")
    print(theta)
    
    # print the data
    print("*******************************************************************")
    print("data:")
    print(y)
    print("")
  }
  
  return(y)
}


#########################################################################
##  
##  analyze.data.normalMix
##
##  Obtain posterior samples using a Gibbs sampler.
##  
##  This function is used as input to the Cook and Rubin validation.
##

analyze.data.normalMix <- function(y, params.true, inputs){
    
  # get the prior hyperparameters
  hyper <-                            inputs[[1]]
  prior.mean <-                       hyper[1]
  prior.prec <-                       hyper[2]
  
  # get the other specifications
  nChains <-                           inputs[[2]]
  nDims <-                             inputs[[3]] 
  verbose <-                           inputs[[4]]
  n.CGR.reps <-                        inputs[[5]]
    
  # sample the initial values from the overdispersed distribution
  initial.values <- list(rep(0,nChains))
  for (chain in (1:nChains)){
    post.mean <- y *0.5
    post.prec <- 2*prior.prec
    covarMat <- diag(rep(post.prec^-1, nDims))
    # verify that EM consistently converges to the value y/2
    verifyPostMean (y, prior.mean, prior.prec)
    # draw the inits from a cauchy distribution centered at
    # y/2 and with the true covariance matrix for that mixture
    # component
    initial.values[[chain]] <-
      rmvt(n=1, sigma = covarMat, df=1)
    initial.values[[chain]] <- initial.values[[chain]] + post.mean
  }
  if (verbose){
    cat("*****************************************************

Initial values =")
    print(initial.values)
  }
  
  # get the posterior samples
  posterior.samples <- sampleNormalMix (data = y, inits = initial.values,
                                nChains = nChains, hyper = hyper)

#  if (verbose)  {
    # print a summary
#    print(summary(posterior.samples))
#  }

  # check the convergence of the chain.  If not converged, print trace plots
  # and issue a warning.
#  poorlyConvergedParams <- poorconvergenceParameters (x = posterior.samples,
#                                                      n.CGR.reps=n.CGR.reps)
#  if (length(poorlyConvergedParams > 0)){
#    warning(paste("Poor convergence of the following parameters: ",
#                  poorlyConvergedParams, ".  Traceplot generated.\n",
#                  sep = ""))
#    traceplot(posterior.samples[,poorlyConvergedParams])
#  }
  plotConvDiag(posterior.samples)
  # Return the posterior samples from the first chain
  return (mcmc.list(posterior.samples)[[1]])
}

poorconvergenceParameters <- function(x, n.CGR.reps){

  # check the effective sample size
  ess <- effectiveSize(x)
  poorConvergence <- (ess < niter(x)/10)

  # check the geweke Z-scores
  nChains <- nchain(x)
  numTests <- nChains * nvar(x)
  for (i in 1:nChains){
    geweke.diag.thischain <- geweke.diag(x)[[i]]$z
    pval <- 2*pnorm(q=abs(geweke.diag.thischain), lower.tail=F)
    pval.adjusted <- pval * numTests * n.CGR.reps
    poorConvergence <- poorConvergence || (pval.adjusted < 0.01)
  }

  # Check the Gelman-Rubin PSRF. The gelman.diag function returns
  # separate psrf values for the parameters but these are sometimes
  # NaN.  It also returns a multivariate psrf, which does not have
  # this problem, so we use it instead.  Although this multivariate
  # psrf is sometimes a complex value, only the real part is
  # important.
  if (Re(gelman.diag(x)$mpsrf) > 1.2)
    poorConvergence <- rep(T, length(ess))
  
  return(which(poorConvergence))
}

sampleNormalMix <- function(data, inits, nChains, hyper){
  library(coda)
  
  writeVec (data, "outcome.txt")
  writeVec (hyper, "hyper.txt")
  samples.mcmc.list <- list(rep(0, nChains))
  for (chain in (1:nChains)){
    writeVec (inits[[chain]], "inits.txt")

    system("./normalMixSample")
    samples <- read.table ("samples.txt")
    samples.mcmc.list[[chain]] <- mcmc(samples)
  }

  mcmc.list(data = samples.mcmc.list)
}

writeVec <- function(vec, filename){
  write.table(matrix(vec, ncol=1), filename, col.names = F, row.names = F)
}

# check that EM consistently converges to the value y/2
verifyPostMean <- function(y, prior.mean, prior.prec){

  for (j in (1:20)){
    theta <- EMrun(y, prior.mean, prior.prec)
    if (any(theta != y/2))
      stop("The mode found by EM is not equal to y/2")
  }
}


EMrun <- function(y, prior.mean, prior.prec){
  #start EM with a draw from the prior
  theta <- rnorm(n=length(y), mean=prior.mean, sd=1/sqrt(prior.prec))

  for (i in (1:20)){
    # calculate the covariance matrices to calculate [Z_1|theta_1,y]
    Sigma1 <- diag(rep(prior.prec^-1, length(y)))
    Sigma0 <- diag(rep(0.0001, length(y)))

    # calculate [Z_1|theta_1,y]
    logA <- dmvnorm(x=y, mean=theta, sigma=Sigma1, log = T)
    logB <- dmvnorm(x=y, mean=theta, sigma=Sigma0, log = T)
    logDiff <- logA - logB
    probRatio <- exp(logDiff)
    if (probRatio == Inf)
      prob <- 1
    else if (probRatio == -Inf)
      prob <- 0
    else
      prob <- probRatio / (1+probRatio)
    # prob is Pr(Z_1 = 1|theta_1, y)
    # calculate the theta that maximizes Q(theta|theta^(t))
    newTheta <- prob*prior.prec + (1-prob)*10000
    newTheta <- newTheta / (prob*(2*prior.prec) + (1-prob)*(10000+prior.prec))
    theta <- newTheta * y
    
  }
}  


plotConvDiag <- function(x){
  postscript("mixModelConvDiag.ps")
  firstParam <- x[,1]
  par(mfrow=c(2,2))
  par(cex.axis=2)
  par(cex.lab=2)
  par(mar=(c(5,5,4,2)+0.1))
  thinned.samples <- window(x=firstParam, thin=10)
  traceplot(thinned.samples[[1]], ylab="Value")
  gewe <- geweke.plot(thinned.samples[[1]], auto.layout=F)
  points(x=gewe[[1]], y=gewe[[2]][,1,1], cex=2)
  autocorr.plot(thinned.samples[[1]], auto.layout=F)
  gelm <- gelman.plot(thinned.samples, auto.layout=F, ylab="PSRF", xlab="Last iteration in segment",
                      lwd=2, col=c(1,1))
  ymax <- max(c(1, gelm$shrink[, 1, ]), na.rm = TRUE)
  xmax <- max(gelm$last.iter)  
  legend(xmax, ymax, legend = c("median", "97.5%"), lty = 1:2, bty = "o", 
                col = c(1,1), xjust = 1, yjust = 1, cex=2, bg="white")
  dev.off()
}

geweke.plot <- 
function (x, frac1 = 0.1, frac2 = 0.5, nbins = 20, pvalue = 0.05, 
    auto.layout = TRUE, ask = dev.interactive(), ...) 
{
    x <- mcmc.list(x)
    oldpar <- NULL
    on.exit(par(oldpar))
    if (auto.layout) 
        oldpar <- par(mfrow = set.mfrow(Nchains = nchain(x), 
            Nparms = nvar(x)))
    ystart <- seq(from = start(x), to = (start(x) + end(x))/2, 
        length = nbins)
    gcd <- array(dim = c(length(ystart), nvar(x), nchain(x)), 
        dimnames = c(ystart, varnames(x), chanames(x)))
    for (n in 1:length(ystart)) {
        geweke.out <- geweke.diag(window(x, start = ystart[n]), 
            frac1 = frac1, frac2 = frac2)
        for (k in 1:nchain(x)) gcd[n, , k] <- geweke.out[[k]]$z
    }
    climit <- qnorm(1 - pvalue/2)
    for (k in 1:nchain(x)) for (j in 1:nvar(x)) {
        ylimit <- max(c(climit, abs(gcd[, j, k])))
        plot(ystart, gcd[, j, k], type = "p", xlab = "First iteration in segment", 
            ylab = "Z-score", pch = 4, ylim = c(-ylimit, ylimit), 
            ...)
        abline(h = c(climit, -climit), lty = 2)
        if (nchain(x) > 1) {
            title(main = paste(varnames(x, allow.null = FALSE)[j], 
                " (", chanames(x, allow.null = FALSE)[k], ")", 
                sep = ""))
        }
        else {
            title(main = paste(varnames(x)[j], 
                sep = ""))
        }
        if (k == 1 && j == 1) 
            oldpar <- c(oldpar, par(ask = ask))
    }
    invisible(list(start.iter = ystart, z = gcd))
}