library(BayesValidate, lib.loc = "~/rlibs")
library(mvtnorm)

validateSSVS <- function(verbose = FALSE, n.CGR.reps=200,
                              threshold = 0.01){

############################################################################
##
##  validateSSVS
##
##  Run the Cook, Gelman, and Rubin (2006) validation of stochastic search
##  variable selection (George and McCullough 1993).
##


  ## Create the inputs to the Cook and Rubin validation
  
  # Specify the hyperparameter values.  Theoretically any values for these 
  # will lead to a valid test.  

  tau2inv <- 10000
  c2inv <- 0.0001
  sigma2inv <- 0.01
  
  hyper <- c( tau2inv, c2inv, sigma2inv)   
  
  blocking <- c( 0,0,0,0,0,0,0,0,0,0 )
  
  # the specifications for the MCMC
  nChains <- 10

  # the inputs for the parameter generation function
  generate.param.inputs <- list(hyper, blocking, verbose)
    
  # the inputs for the parameter generation function
  generate.data.inputs <- list(sigma2inv, verbose)

  # the inputs for the function to draw from the posterior 
  analyze.data.inputs <- list(hyper, blocking, nChains, verbose,
                              n.CGR.reps)

  ## run the validation
  tst <- validate(generate.param = generate.param.SSVS, 
    generate.param.inputs = generate.param.inputs, 
    generate.data = generate.data.SSVS,
    generate.data.inputs = generate.data.inputs,
    analyze.data = analyze.data.SSVS, 
    analyze.data.inputs = analyze.data.inputs,
    n.rep = n.CGR.reps, quantilePlot.directory = "./")

  printCGRoutput(tst, threshold=threshold, file="./testResults.txt")
  return(invisible(tst))
  
}


betaFromPrior <- function(blocking, tau2inv, c2inv){
############################################################################
##
##  betaFromPrior
##
##  Sample beta from its prior distribution
##

  nPreds <- length(blocking)
  blocks <- unique(blocking)
  mixComp <- rep(-1, nPreds)
  for (j in blocks){
    thisBlock <- (blocking == j)
    mixComp[thisBlock] <- rbinom(n=1, size=1, prob=0.5)
  }
  
  beta <- rep(-1, nPreds)
  for (i in (1:nPreds)){
    sd <- 1/sqrt(tau2inv)
    if (mixComp[i])
      sd <- sd/sqrt(c2inv)
    beta[i] <- rnorm(n=1, sd = sd)
  }

  return(beta)
}


generate.param.SSVS <- function(inputs){
#########################################################################
##  
##  generate.param.SSVS
##
##  Generate the SSVS regression parameters for their prior distributions.
##  This function is used as input to the Cook and Rubin validation.
##

  # get the hyperparameters
  hyper <- inputs[[1]]
  tau2inv <- hyper[1]
  c2inv <- hyper[2]
  
  blocking <- inputs[[2]]
  verbose <- inputs[[3]]
  
  beta <- betaFromPrior(blocking=blocking, tau2inv=tau2inv, c2inv=c2inv)
  
  if (verbose){
    # print the parameter values
    cat("*******************************************************************\n")
    cat("parameter values:\n")
    cat("beta: ", beta, "\n")
    cat("*******************************************************************\n")
  }
  
  return(beta)
}


generate.data.SSVS <- function(params, inputs){
#########################################################################
##  
##  generate.data.SSVS
##
##  Given the parameters of the model, generate data from
##  the model.  
##  
##  This function is used as input to the Cook and Rubin validation.
##

  sigma2inv <- inputs[[1]]
  verbose <- inputs[[2]]
  
  beta <- params
  nDims <- length(beta)
  y <- rep(0, nDims)
  for (i in (1:nDims))
    y[i] <- rnorm(n = 1, mean=beta[i], sd=1/sqrt(sigma2inv))

  if (verbose){
    # print the data value
    cat("*******************************************************************\n")
    cat("data:\n")
    cat("y: ", y, "\n")
    cat("*******************************************************************\n")
  }
  return(y)
}

analyze.data.SSVS <- function(y, params.true, inputs){
#########################################################################
##  
##  analyze.data.SSVS
##
##  Obtain posterior samples using a Gibbs sampler.
##  
##  This function is used as input to the Cook and Rubin validation.
##
    
  # get the prior hyperparameters
  hyper <- inputs[[1]]
  tau2inv <- hyper[1]
  c2inv <- hyper[2]
  sigma2inv <- hyper[3]
  
  # get the other specifications
  blocking <-                         inputs[[2]]
  nChains <-                           inputs[[3]]
  verbose <-                          inputs[[4]]
  n.CGR.reps <-                          inputs[[5]] 

  # sample the initial values from the overdispersed distribution
  inits.beta <- list(rep(0,nChains))
  nDims <- length(blocking)
  for (chain in (1:nChains)){
    # verify that EM consistently converges to the same value,
    # and set the estimated posterior mean equal to that value.
    # Also verify that the density of the mixture component 0
    # is approximately zero at the mode discovered by EM.
    post.mean <- verifyPostMean (y=y, tau2inv=tau2inv, c2inv=c2inv,
                                 sigma2inv=sigma2inv, nDims=nDims)
    covarMat <- diag(rep((tau2inv*c2inv)^-1, nDims))
    # draw the inits from a multivariate cauchy distribution centered at
    # post.mean and with the true covariance matrix for the wide mixture
    # component (1)
    inits.beta[[chain]] <-
      as.vector(rmvt(n=1, sigma = covarMat, df=1))
    inits.beta[[chain]] <- inits.beta[[chain]] + post.mean
  }
  if(F){                                     #  if (verbose){
    cat("*****************************************************

Initial values =")
    print(inits.beta)
  }

  # get the posterior samples
  post.mcmc <-
    sampleSSVS(data = y, blocking = blocking,
               hyper=hyper, inits.beta=inits.beta)

  if (verbose)	{
    if (nvar(post.mcmc) > 40){
      tmp.post.mcmc <- post.mcmc[,1:40]
    } else {
      tmp.post.mcmc <- post.mcmc
    }
    print(summary(tmp.post.mcmc))
  }
  # check the convergence of the chain.  If not converged, print
  # trace plots and issue a warning
  poorConvergeParams <- poorconvergenceParameters(x=post.mcmc,
                                                  n.CGR.reps=n.CGR.reps)
  if (any(poorConvergeParams)){
    warning("Poor convergence of the some parameters.  Traceplot generated.")
    par(mfrow = c(3,2))
    print(traceplot(post.mcmc[,poorConvergeParams]))
  }
					
  return (post.mcmc)
}

sampleSSVS <- function(data, blocking, hyper, inits.beta){
#########################################################################
##  
##  sampleSSVS
##
##  Obtain posterior samples from the SSVS model using a Gibbs sampler.
##  

  writeVec(data, "outcome.txt")
  writeVec(blocking, "blocking.txt")
  writeVec(hyper, "hyper.txt")

  library(coda)

  nChains <- length(inits.beta)
  samples.mcmc.list <- list(rep(0, nChains))
  for (chain in (1:nChains)){
    writeVec (inits.beta[[chain]], "initsBeta.txt")

    system("./sampleSSVS")
    samples <- read.table ("samples.txt")
    samples.mcmc.list[[chain]] <- mcmc(samples)
  }

  mcmc.list(data = samples.mcmc.list)
}

poorconvergenceParameters <- function(x, n.CGR.reps){
#########################################################################
##  
##  poorconvergenceParameters
##
##  check the convergence of the parameters, and return the indices of
##  poorly converged parameters if there are any.
##
  
  # check the effective sample size
  ess <- effectiveSize(x)
  poorConvergence <- (ess < niter(x)/10)

  # check the geweke Z-scores
  nChains <- nchain(x)
  numTests <- nChains * nvar(x)
  for (i in 1:nChains){
    thinnedSamples <- window(x[[i]], thin=10)
    geweke.diag.thischain <- geweke.diag(thinnedSamples)$z
    pval <- 2*pnorm(q=abs(geweke.diag.thischain), lower.tail=F)
    pval.adjusted <- pval * numTests * n.CGR.reps
    poorConvergence <- poorConvergence || (pval.adjusted < 0.01)
  }

  # Check the Gelman-Rubin PSRF. The gelman.diag function returns
  # separate psrf values for the parameters but these are sometimes
  # NaN.  It also returns a multivariate psrf, which does not have
  # this problem, so we use it instead.  Although this multivariate
  # psrf is sometimes a complex value, only the real part is
  # important.
  if (Re(gelman.diag(x)$mpsrf) > 1.2)
    poorConvergence <- rep(T, length(ess))
  
  return(which(poorConvergence))
}

writeVec <- function(vec, filename){
########################################################################
##
## writeVec
##
## write a vector to a file
##
  
  write.table(matrix(vec,ncol=1), filename, col.names = F, row.names = F)
}


verifyPostMean <- function(y, tau2inv, c2inv, sigma2inv, nDims){
########################################################################
##
## verifyPostMean
##
## check that EM consistently converges to the same value of beta, and
## return that value
##

  beta <- list(rep(-1, 5))
  for (j in (1:5)){
    beta[[j]] <- EMrun(y=y, tau2inv=tau2inv, c2inv=c2inv,
                       sigma2inv=sigma2inv, nDims=nDims)
    for (i in (1:nDims))
      if (!isTRUE(all.equal(beta[[j]][i], beta[[1]][i], tolerance=10e-6)))
        stop("The modes found by EM are not all equal")
  }
  beta[[1]]
}


EMrun <- function(y, tau2inv, c2inv, sigma2inv, nDims){
########################################################################
##
## EMrun
##
## Run EM, starting with a draw from a simplified posterior for beta.
## After a fixed number of iterations, return the resulting value.
##

  # start EM with a draw from a simplified posterior for beta
  beta <- rep(0, nDims)
  betaMean <- sigma2inv*y/(sigma2inv+c2inv*tau2inv)
  for (j in (1:nDims))
    beta[j] <- rnorm(n=1, mean=betaMean[j],
                     sd = 1/sqrt(sigma2inv + c2inv*tau2inv))

  for (i in (1:10)){
    # calculate [alpha_1|beta, y]
    logA <- dmvnorm(x=beta, sigma=diag(rep(1/(c2inv*tau2inv),nDims)), log=T)
    logB <- dmvnorm(x=beta, sigma=diag(rep(1/tau2inv, nDims)), log=T)
    logDiff <- logA - logB
    probRatio <- exp(logDiff)
    if (probRatio == Inf)
      prob <- 1
    else
      prob <- probRatio / (1+probRatio)
    if (prob < 0.95){
      warning("Probability of mixture component 0 is nontrivial")
      cat("prob1 = ", prob, "\n")
      cat("beta = ", beta, "\n")
      
    }
    # prob is Pr(alpha_1 = 1|beta, sigma2inv, y)
    # calculate the expected value of a_j^-2 as defined
    # in George and McCulloch (expectation step of EM)
    a2invBar <- prob * c2inv + (1-prob)
    # do the maximization step in EM to get the new value of
    # beta.
    beta <- sigma2inv*y/(sigma2inv+a2invBar*tau2inv)
      
  }

  return(beta)
}