########################################### # Example R scripts # # These scripts pertain to the processing presented in the User's Guide. # So, the scripts must be edited for your situation. # ########################################### # Load some required libraries. library(GcClust) library(shiny) library(shinystan) library(sp) library(maps) # Set the working directory to Process1. setwd("~/Process1") # Save the geochemical data to a binary file, and load. gcData <- CoGeochemData save(gcData, file="gcData.dat") load("gcData.dat") # Plot the state of Colorado as a gray polygon, and add axes. map('state', regions = 'colorado', fill = TRUE, col = "gray60", border = "white") map.axes() # Add the sample locations. plot( gcData$concData, add = TRUE, pch = 16, cex = 1/3, col = "black") cat(sprintf("The number of field samples = %d\n", nrow(gcData$concData@data))) # Pre-process the geochemical data, first with the isometric log-ratio # transform and then with the robust principal component transform. transData <- transformGcData(gcData) save(transData, file = "TransData.dat") # Plot boxplots and violinplots of the principal components (may take a few seconds). plotEdaDist(transData) # Plot the correlation matrix and histogram of correlations (may take a few seconds). plotEdaCorr(transData) # Check the variances of the principal components using a scree plot. plotEdaVar(transData) # Set the number of principal components to use from the scree plot. nPCs <- 22 # Load the compiled mixture model into the R session. tmp <- normalizePath(path.package("GcClust")) load(paste(tmp, "\\stan\\MixtureModel.bin", sep="")) # Monte Carlo sampling of the posterior probability density function (pdf) # # In this example 7 of 8 available cores are used. # # NOTE: This step may require several hours, depending on the number of # CPU cores, CPU processing speed, and the amount of data. # priorParams <- c(4, 3, 3, 2) samplePars <- sampleFmm(transData, nPCs, sm, priorParams, nChainsPerCore = 5, nCores = 7) save(samplePars, file = "SamplePars.dat") # Check for within-chain label-switching. plotSelectedTraces(samplePars) # Select the chains and the switching. # NOTE: This may take a few minutes! plotPointStats(samplePars) # You need to create a common-separated value file # to note which chains are switched. In a file named "SelectedChains.csv" # enter the following lines (not the "#" of course) and set # "isSwitched" to "T" for chains that must be switched or to "F" # for chains that must not be switched. # Chain, isSwitched # 1, F # 2, F # 3, F # 4, F # Save the file to the current working directory. # Read the common-separated value file, switch and combine the chains. selectedChains <- read.csv("SelectedChains.csv", header = TRUE, stringsAsFactors = FALSE) combinedChains <- combineChains(samplePars, selectedChains) save(combinedChains, file = "CombinedChains.dat") # Evaluate the convergence of the Monte Carlo sampling using "shinystan" # to view various graphical and statistical summaries that indicate # how well the sampling converged. launch_shinystan(combinedChains) # Calculate the conditional probability that a field sample is associated # with probability density function (pdf) 1 in the finite mixture model. condProbs1 <- calcCondProbs1(transData, nPCs, combinedChains) save(condProbs1, file = "CondProbs1.dat") # Compare observed and replicated test statistics. obsTestStats <- calcObsTestStats(transData, nPCs, condProbs1) save(obsTestStats, file = "ObsTestStats.dat") plotTMeanSd(combinedChains, obsTestStats) plotTCorr(combinedChains, obsTestStats) # Back transform the parameters of the finite mixture model to the simplex. simplexModPar <- backTransform(gcData, nPCs, transData, combinedChains) save( simplexModPar, file = "SimplexModPar.dat") # Set the order in which the elements are plotted by using a vector. elementOrder <- c( "Sr", "U", "Y", "Nb", "La", "Ce", "Th", "Na", "Al", "Ga", "Be", "K", "Rb", "Ba", "Pb", "Cu", "Zn", "Mo", "Mg", "Sc", "Co", "Fe", "V", "Ni", "Cr", "Ca", "P", "Ti", "Li", "Mn", "Sn", "As", "Bi", "Cd", "In", "S", "Sb", "Tl", "W", "EE") # Plot the medians of the compositional means for both pdfs in the finite # mixture model. plotCompMeans(simplexModPar, elementOrder) # Calculate the the sample centers for all field samples from # the geochemical survey. simplexStats <- calcSimplexStats(gcData) save(simplexStats, file = "SimplexStats.dat") # Plot the translated compositional means. The translation is # calculated with the sample center for the current subset of field samples. plotTransCompMeans( simplexModPar, simplexStats, gcData, elementOrder) # Plot the scaled variation matrices. plotSqrtVarMatrices( simplexModPar, elementOrder, colorScale = "spectrum" ) # Plot the clusters on a map. map('state', regions = 'colorado', fill = TRUE, col = "gray60", border = "white") map.axes() plotClusters(gcData, condProbs1, symbolSizes = rep.int(2/3, 4)) # Partition the geochemical data according to the # conditional probabilities theSplits <- splitGcData(gcData, condProbs1, threshold = 0.10 ) # Create new directories and save the splits. In the default directory, # where Process1 is, create two more directories: Process11 and Process12. ### Beware: The next line overwrites gcData !!! gcData <- theSplits$gcData1 save( gcData, file = "..\\Process11\\gcData.dat" ) ### Beware: The next line overwrites gcData !!! gcData <- theSplits$gcData2 save( gcData, file = "..\\Process12\\gcData.dat" )