library(BayesValidate, lib.loc="~/rlibs") library(mvtnorm) # Contains the function rmvt for sampling from # multivariate t distributions, and dmvnorm for # the density of multivariate normal distributions ############################################################################ ## ## validateNormalMix ## ## Run the Cook, Gelman, and Rubin (2006) validation of a normal mixture ## model. ## validateNormalMix <- function(verbose = FALSE, n.CGR.reps=200, threshold = 0.01){ ## Create the inputs to the Cook and Rubin validation # Specify the hyperparameter values. Theoretically any values for these # will lead to a valid test. prior.mean <- 0 prior.prec <- 1 hyper <- c( prior.mean, prior.prec) nDims <- 8 # the specifications for the MCMC nChains <- 10 # the inputs for the parameter generation function generate.param.inputs <- c(hyper, nDims) # the inputs for the data generation function generate.data.inputs <- c(hyper, nDims, verbose) # the inputs for the function to draw from the posterior analyze.data.inputs <- list(hyper, nChains, nDims, verbose, n.CGR.reps) ## run the validation #set.seed(11) tst <- validate(generate.param = generate.param.normalMix, generate.param.inputs = generate.param.inputs, generate.data = generate.data.normalMix, generate.data.inputs = generate.data.inputs, analyze.data = analyze.data.normalMix, analyze.data.inputs = analyze.data.inputs, n.rep = n.CGR.reps, quantilePlot.directory = "./") printCGRoutput(tst, "./testResults.txt") return(invisible(tst)) } ######################################################################### ## ## generate.param.normalMix ## ## Generate the parameters for the normal mixture model from their ## prior distributions. ## This function is used as input to the Cook and Rubin validation. ## generate.param.normalMix <- function(inputs){ # get the hyperparameters prior.mean <- inputs[1] prior.prec <- inputs[2] nDims <- inputs[3] # sample the parameter theta <- rnorm(n=nDims, mean=prior.mean, sd=(1/sqrt(prior.prec))) return(theta) } ######################################################################### ## ## generate.data.normalMix ## ## Given the parameters of the normal mixture model, generate data from ## the model. ## ## This function is used as input to the Cook and Rubin validation. ## generate.data.normalMix <- function(params, inputs){ # get the prior mean and precision mean <- inputs[1] prec <- inputs[2] # get the number of dimensions nDims <- inputs[3] # get an indicator of whether to print information verbose <- inputs[4] # get the parameters theta <- params # sample the outcome x <- rbinom(n=1, size=1, prob=0.5) if (x == 1){ y <- rnorm(n=nDims, mean=theta, sd=0.01) } else { y <- rnorm(n=nDims, mean=theta, sd = (1/sqrt(prec))) } if (verbose){ # print the parameter values print("*******************************************************************") print("parameter values:") print("") print("theta") print(theta) # print the data print("*******************************************************************") print("data:") print(y) print("") } return(y) } ######################################################################### ## ## analyze.data.normalMix ## ## Obtain posterior samples using a Gibbs sampler. ## ## This function is used as input to the Cook and Rubin validation. ## analyze.data.normalMix <- function(y, params.true, inputs){ # get the prior hyperparameters hyper <- inputs[[1]] prior.mean <- hyper[1] prior.prec <- hyper[2] # get the other specifications nChains <- inputs[[2]] nDims <- inputs[[3]] verbose <- inputs[[4]] n.CGR.reps <- inputs[[5]] # sample the initial values from the overdispersed distribution initial.values <- list(rep(0,nChains)) for (chain in (1:nChains)){ post.mean <- y *0.5 post.prec <- 2*prior.prec covarMat <- diag(rep(post.prec^-1, nDims)) # verify that EM consistently converges to the value y/2 verifyPostMean (y, prior.mean, prior.prec) # draw the inits from a cauchy distribution centered at # y/2 and with the true covariance matrix for that mixture # component initial.values[[chain]] <- rmvt(n=1, sigma = covarMat, df=1) initial.values[[chain]] <- initial.values[[chain]] + post.mean } if (verbose){ cat("***************************************************** Initial values =") print(initial.values) } # get the posterior samples posterior.samples <- sampleNormalMix (data = y, inits = initial.values, nChains = nChains, hyper = hyper) # if (verbose) { # print a summary # print(summary(posterior.samples)) # } # check the convergence of the chain. If not converged, print trace plots # and issue a warning. # poorlyConvergedParams <- poorconvergenceParameters (x = posterior.samples, # n.CGR.reps=n.CGR.reps) # if (length(poorlyConvergedParams > 0)){ # warning(paste("Poor convergence of the following parameters: ", # poorlyConvergedParams, ". Traceplot generated.\n", # sep = "")) # traceplot(posterior.samples[,poorlyConvergedParams]) # } plotConvDiag(posterior.samples) # Return the posterior samples from the first chain return (mcmc.list(posterior.samples)[[1]]) } poorconvergenceParameters <- function(x, n.CGR.reps){ # check the effective sample size ess <- effectiveSize(x) poorConvergence <- (ess < niter(x)/10) # check the geweke Z-scores nChains <- nchain(x) numTests <- nChains * nvar(x) for (i in 1:nChains){ geweke.diag.thischain <- geweke.diag(x)[[i]]$z pval <- 2*pnorm(q=abs(geweke.diag.thischain), lower.tail=F) pval.adjusted <- pval * numTests * n.CGR.reps poorConvergence <- poorConvergence || (pval.adjusted < 0.01) } # Check the Gelman-Rubin PSRF. The gelman.diag function returns # separate psrf values for the parameters but these are sometimes # NaN. It also returns a multivariate psrf, which does not have # this problem, so we use it instead. Although this multivariate # psrf is sometimes a complex value, only the real part is # important. if (Re(gelman.diag(x)$mpsrf) > 1.2) poorConvergence <- rep(T, length(ess)) return(which(poorConvergence)) } sampleNormalMix <- function(data, inits, nChains, hyper){ library(coda) writeVec (data, "outcome.txt") writeVec (hyper, "hyper.txt") samples.mcmc.list <- list(rep(0, nChains)) for (chain in (1:nChains)){ writeVec (inits[[chain]], "inits.txt") system("./normalMixSample") samples <- read.table ("samples.txt") samples.mcmc.list[[chain]] <- mcmc(samples) } mcmc.list(data = samples.mcmc.list) } writeVec <- function(vec, filename){ write.table(matrix(vec, ncol=1), filename, col.names = F, row.names = F) } # check that EM consistently converges to the value y/2 verifyPostMean <- function(y, prior.mean, prior.prec){ for (j in (1:20)){ theta <- EMrun(y, prior.mean, prior.prec) if (any(theta != y/2)) stop("The mode found by EM is not equal to y/2") } } EMrun <- function(y, prior.mean, prior.prec){ #start EM with a draw from the prior theta <- rnorm(n=length(y), mean=prior.mean, sd=1/sqrt(prior.prec)) for (i in (1:20)){ # calculate the covariance matrices to calculate [Z_1|theta_1,y] Sigma1 <- diag(rep(prior.prec^-1, length(y))) Sigma0 <- diag(rep(0.0001, length(y))) # calculate [Z_1|theta_1,y] logA <- dmvnorm(x=y, mean=theta, sigma=Sigma1, log = T) logB <- dmvnorm(x=y, mean=theta, sigma=Sigma0, log = T) logDiff <- logA - logB probRatio <- exp(logDiff) if (probRatio == Inf) prob <- 1 else if (probRatio == -Inf) prob <- 0 else prob <- probRatio / (1+probRatio) # prob is Pr(Z_1 = 1|theta_1, y) # calculate the theta that maximizes Q(theta|theta^(t)) newTheta <- prob*prior.prec + (1-prob)*10000 newTheta <- newTheta / (prob*(2*prior.prec) + (1-prob)*(10000+prior.prec)) theta <- newTheta * y } } plotConvDiag <- function(x){ postscript("mixModelConvDiag.ps") firstParam <- x[,1] par(mfrow=c(2,2)) par(cex.axis=2) par(cex.lab=2) par(mar=(c(5,5,4,2)+0.1)) thinned.samples <- window(x=firstParam, thin=10) traceplot(thinned.samples[[1]], ylab="Value") gewe <- geweke.plot(thinned.samples[[1]], auto.layout=F) points(x=gewe[[1]], y=gewe[[2]][,1,1], cex=2) autocorr.plot(thinned.samples[[1]], auto.layout=F) gelm <- gelman.plot(thinned.samples, auto.layout=F, ylab="PSRF", xlab="Last iteration in segment", lwd=2, col=c(1,1)) ymax <- max(c(1, gelm$shrink[, 1, ]), na.rm = TRUE) xmax <- max(gelm$last.iter) legend(xmax, ymax, legend = c("median", "97.5%"), lty = 1:2, bty = "o", col = c(1,1), xjust = 1, yjust = 1, cex=2, bg="white") dev.off() } geweke.plot <- function (x, frac1 = 0.1, frac2 = 0.5, nbins = 20, pvalue = 0.05, auto.layout = TRUE, ask = dev.interactive(), ...) { x <- mcmc.list(x) oldpar <- NULL on.exit(par(oldpar)) if (auto.layout) oldpar <- par(mfrow = set.mfrow(Nchains = nchain(x), Nparms = nvar(x))) ystart <- seq(from = start(x), to = (start(x) + end(x))/2, length = nbins) gcd <- array(dim = c(length(ystart), nvar(x), nchain(x)), dimnames = c(ystart, varnames(x), chanames(x))) for (n in 1:length(ystart)) { geweke.out <- geweke.diag(window(x, start = ystart[n]), frac1 = frac1, frac2 = frac2) for (k in 1:nchain(x)) gcd[n, , k] <- geweke.out[[k]]$z } climit <- qnorm(1 - pvalue/2) for (k in 1:nchain(x)) for (j in 1:nvar(x)) { ylimit <- max(c(climit, abs(gcd[, j, k]))) plot(ystart, gcd[, j, k], type = "p", xlab = "First iteration in segment", ylab = "Z-score", pch = 4, ylim = c(-ylimit, ylimit), ...) abline(h = c(climit, -climit), lty = 2) if (nchain(x) > 1) { title(main = paste(varnames(x, allow.null = FALSE)[j], " (", chanames(x, allow.null = FALSE)[k], ")", sep = "")) } else { title(main = paste(varnames(x)[j], sep = "")) } if (k == 1 && j == 1) oldpar <- c(oldpar, par(ask = ask)) } invisible(list(start.iter = ystart, z = gcd)) }