library(BayesValidate, lib.loc = "~/rlibs") library(mvtnorm) validateSSVS <- function(verbose = FALSE, n.CGR.reps=200, threshold = 0.01){ ############################################################################ ## ## validateSSVS ## ## Run the Cook, Gelman, and Rubin (2006) validation of stochastic search ## variable selection (George and McCullough 1993). ## ## Create the inputs to the Cook and Rubin validation # Specify the hyperparameter values. Theoretically any values for these # will lead to a valid test. tau2inv <- 10000 c2inv <- 0.0001 sigma2inv <- 0.01 hyper <- c( tau2inv, c2inv, sigma2inv) blocking <- c( 0,0,0,0,0,0,0,0,0,0 ) # the specifications for the MCMC nChains <- 10 # the inputs for the parameter generation function generate.param.inputs <- list(hyper, blocking, verbose) # the inputs for the parameter generation function generate.data.inputs <- list(sigma2inv, verbose) # the inputs for the function to draw from the posterior analyze.data.inputs <- list(hyper, blocking, nChains, verbose, n.CGR.reps) ## run the validation tst <- validate(generate.param = generate.param.SSVS, generate.param.inputs = generate.param.inputs, generate.data = generate.data.SSVS, generate.data.inputs = generate.data.inputs, analyze.data = analyze.data.SSVS, analyze.data.inputs = analyze.data.inputs, n.rep = n.CGR.reps, quantilePlot.directory = "./") printCGRoutput(tst, threshold=threshold, file="./testResults.txt") return(invisible(tst)) } betaFromPrior <- function(blocking, tau2inv, c2inv){ ############################################################################ ## ## betaFromPrior ## ## Sample beta from its prior distribution ## nPreds <- length(blocking) blocks <- unique(blocking) mixComp <- rep(-1, nPreds) for (j in blocks){ thisBlock <- (blocking == j) mixComp[thisBlock] <- rbinom(n=1, size=1, prob=0.5) } beta <- rep(-1, nPreds) for (i in (1:nPreds)){ sd <- 1/sqrt(tau2inv) if (mixComp[i]) sd <- sd/sqrt(c2inv) beta[i] <- rnorm(n=1, sd = sd) } return(beta) } generate.param.SSVS <- function(inputs){ ######################################################################### ## ## generate.param.SSVS ## ## Generate the SSVS regression parameters for their prior distributions. ## This function is used as input to the Cook and Rubin validation. ## # get the hyperparameters hyper <- inputs[[1]] tau2inv <- hyper[1] c2inv <- hyper[2] blocking <- inputs[[2]] verbose <- inputs[[3]] beta <- betaFromPrior(blocking=blocking, tau2inv=tau2inv, c2inv=c2inv) if (verbose){ # print the parameter values cat("*******************************************************************\n") cat("parameter values:\n") cat("beta: ", beta, "\n") cat("*******************************************************************\n") } return(beta) } generate.data.SSVS <- function(params, inputs){ ######################################################################### ## ## generate.data.SSVS ## ## Given the parameters of the model, generate data from ## the model. ## ## This function is used as input to the Cook and Rubin validation. ## sigma2inv <- inputs[[1]] verbose <- inputs[[2]] beta <- params nDims <- length(beta) y <- rep(0, nDims) for (i in (1:nDims)) y[i] <- rnorm(n = 1, mean=beta[i], sd=1/sqrt(sigma2inv)) if (verbose){ # print the data value cat("*******************************************************************\n") cat("data:\n") cat("y: ", y, "\n") cat("*******************************************************************\n") } return(y) } analyze.data.SSVS <- function(y, params.true, inputs){ ######################################################################### ## ## analyze.data.SSVS ## ## Obtain posterior samples using a Gibbs sampler. ## ## This function is used as input to the Cook and Rubin validation. ## # get the prior hyperparameters hyper <- inputs[[1]] tau2inv <- hyper[1] c2inv <- hyper[2] sigma2inv <- hyper[3] # get the other specifications blocking <- inputs[[2]] nChains <- inputs[[3]] verbose <- inputs[[4]] n.CGR.reps <- inputs[[5]] # sample the initial values from the overdispersed distribution inits.beta <- list(rep(0,nChains)) nDims <- length(blocking) for (chain in (1:nChains)){ # verify that EM consistently converges to the same value, # and set the estimated posterior mean equal to that value. # Also verify that the density of the mixture component 0 # is approximately zero at the mode discovered by EM. post.mean <- verifyPostMean (y=y, tau2inv=tau2inv, c2inv=c2inv, sigma2inv=sigma2inv, nDims=nDims) covarMat <- diag(rep((tau2inv*c2inv)^-1, nDims)) # draw the inits from a multivariate cauchy distribution centered at # post.mean and with the true covariance matrix for the wide mixture # component (1) inits.beta[[chain]] <- as.vector(rmvt(n=1, sigma = covarMat, df=1)) inits.beta[[chain]] <- inits.beta[[chain]] + post.mean } if(F){ # if (verbose){ cat("***************************************************** Initial values =") print(inits.beta) } # get the posterior samples post.mcmc <- sampleSSVS(data = y, blocking = blocking, hyper=hyper, inits.beta=inits.beta) if (verbose) { if (nvar(post.mcmc) > 40){ tmp.post.mcmc <- post.mcmc[,1:40] } else { tmp.post.mcmc <- post.mcmc } print(summary(tmp.post.mcmc)) } # check the convergence of the chain. If not converged, print # trace plots and issue a warning poorConvergeParams <- poorconvergenceParameters(x=post.mcmc, n.CGR.reps=n.CGR.reps) if (any(poorConvergeParams)){ warning("Poor convergence of the some parameters. Traceplot generated.") par(mfrow = c(3,2)) print(traceplot(post.mcmc[,poorConvergeParams])) } return (post.mcmc) } sampleSSVS <- function(data, blocking, hyper, inits.beta){ ######################################################################### ## ## sampleSSVS ## ## Obtain posterior samples from the SSVS model using a Gibbs sampler. ## writeVec(data, "outcome.txt") writeVec(blocking, "blocking.txt") writeVec(hyper, "hyper.txt") library(coda) nChains <- length(inits.beta) samples.mcmc.list <- list(rep(0, nChains)) for (chain in (1:nChains)){ writeVec (inits.beta[[chain]], "initsBeta.txt") system("./sampleSSVS") samples <- read.table ("samples.txt") samples.mcmc.list[[chain]] <- mcmc(samples) } mcmc.list(data = samples.mcmc.list) } poorconvergenceParameters <- function(x, n.CGR.reps){ ######################################################################### ## ## poorconvergenceParameters ## ## check the convergence of the parameters, and return the indices of ## poorly converged parameters if there are any. ## # check the effective sample size ess <- effectiveSize(x) poorConvergence <- (ess < niter(x)/10) # check the geweke Z-scores nChains <- nchain(x) numTests <- nChains * nvar(x) for (i in 1:nChains){ thinnedSamples <- window(x[[i]], thin=10) geweke.diag.thischain <- geweke.diag(thinnedSamples)$z pval <- 2*pnorm(q=abs(geweke.diag.thischain), lower.tail=F) pval.adjusted <- pval * numTests * n.CGR.reps poorConvergence <- poorConvergence || (pval.adjusted < 0.01) } # Check the Gelman-Rubin PSRF. The gelman.diag function returns # separate psrf values for the parameters but these are sometimes # NaN. It also returns a multivariate psrf, which does not have # this problem, so we use it instead. Although this multivariate # psrf is sometimes a complex value, only the real part is # important. if (Re(gelman.diag(x)$mpsrf) > 1.2) poorConvergence <- rep(T, length(ess)) return(which(poorConvergence)) } writeVec <- function(vec, filename){ ######################################################################## ## ## writeVec ## ## write a vector to a file ## write.table(matrix(vec,ncol=1), filename, col.names = F, row.names = F) } verifyPostMean <- function(y, tau2inv, c2inv, sigma2inv, nDims){ ######################################################################## ## ## verifyPostMean ## ## check that EM consistently converges to the same value of beta, and ## return that value ## beta <- list(rep(-1, 5)) for (j in (1:5)){ beta[[j]] <- EMrun(y=y, tau2inv=tau2inv, c2inv=c2inv, sigma2inv=sigma2inv, nDims=nDims) for (i in (1:nDims)) if (!isTRUE(all.equal(beta[[j]][i], beta[[1]][i], tolerance=10e-6))) stop("The modes found by EM are not all equal") } beta[[1]] } EMrun <- function(y, tau2inv, c2inv, sigma2inv, nDims){ ######################################################################## ## ## EMrun ## ## Run EM, starting with a draw from a simplified posterior for beta. ## After a fixed number of iterations, return the resulting value. ## # start EM with a draw from a simplified posterior for beta beta <- rep(0, nDims) betaMean <- sigma2inv*y/(sigma2inv+c2inv*tau2inv) for (j in (1:nDims)) beta[j] <- rnorm(n=1, mean=betaMean[j], sd = 1/sqrt(sigma2inv + c2inv*tau2inv)) for (i in (1:10)){ # calculate [alpha_1|beta, y] logA <- dmvnorm(x=beta, sigma=diag(rep(1/(c2inv*tau2inv),nDims)), log=T) logB <- dmvnorm(x=beta, sigma=diag(rep(1/tau2inv, nDims)), log=T) logDiff <- logA - logB probRatio <- exp(logDiff) if (probRatio == Inf) prob <- 1 else prob <- probRatio / (1+probRatio) if (prob < 0.95){ warning("Probability of mixture component 0 is nontrivial") cat("prob1 = ", prob, "\n") cat("beta = ", beta, "\n") } # prob is Pr(alpha_1 = 1|beta, sigma2inv, y) # calculate the expected value of a_j^-2 as defined # in George and McCulloch (expectation step of EM) a2invBar <- prob * c2inv + (1-prob) # do the maximization step in EM to get the new value of # beta. beta <- sigma2inv*y/(sigma2inv+a2invBar*tau2inv) } return(beta) }