###########################################
# Example R scripts
#
# These scripts pertain to the processing presented in the User's Guide.
# So, the scripts must be edited for your situation.
#
###########################################


# Load some required libraries.
library(GcClust)
library(shiny)
library(shinystan)
library(sp)
library(maps)

# Set the working directory to Process1.
setwd("~/Process1")

# Save the geochemical data to a binary file, and load.
gcData <- CoGeochemData
save(gcData, file="gcData.dat")
load("gcData.dat")

# Plot the state of Colorado as a gray polygon, and add axes.
map('state', regions = 'colorado', fill = TRUE, col = "gray60", border = "white")
map.axes()

# Add the sample locations.
plot( gcData$concData, add = TRUE, pch = 16, cex = 1/3, col = "black")

cat(sprintf("The number of field samples = %d\n", nrow(gcData$concData@data)))

# Pre-process the geochemical data, first with the isometric log-ratio
# transform and then with the robust principal component transform.
transData <- transformGcData(gcData)
save(transData, file = "TransData.dat")

# Plot boxplots and violinplots of the principal components (may take a few seconds).
plotEdaDist(transData)

# Plot the correlation matrix and histogram of correlations (may take a few seconds).
plotEdaCorr(transData)

# Check the variances of the principal components using a scree plot.
plotEdaVar(transData)

# Set the number of principal components to use from the scree plot.
nPCs <- 22

# Load the compiled mixture model into the R session.
tmp <- normalizePath(path.package("GcClust"))
load(paste(tmp, "\\stan\\MixtureModel.bin", sep=""))

# Monte Carlo sampling of the posterior probability density function (pdf)
#
# In this example 7 of 8 available cores are used.
#
# NOTE: This step may require several hours, depending on the number of
# CPU cores, CPU processing speed, and the amount of data.
#

priorParams <- c(4, 3, 3, 2)
samplePars <- sampleFmm(transData, nPCs, sm, priorParams, nChainsPerCore = 5, nCores = 7)
save(samplePars, file = "SamplePars.dat")

# Check for within-chain label-switching.
plotSelectedTraces(samplePars)

# Select the chains and the switching.
# NOTE: This may take a few minutes!
plotPointStats(samplePars)

# You need to create a common-separated value file
# to note which chains are switched. In a file named "SelectedChains.csv"
# enter the following lines (not the "#" of course) and set
# "isSwitched" to "T" for chains that must be switched or to "F"
# for chains that must not be switched.
# Chain, isSwitched
# 1, F
# 2, F
# 3, F
# 4, F
# Save the file to the current working directory.

# Read the common-separated value file, switch and combine the chains.
selectedChains <- read.csv("SelectedChains.csv", header = TRUE,
                           stringsAsFactors = FALSE)
combinedChains <- combineChains(samplePars, selectedChains)
save(combinedChains, file = "CombinedChains.dat")

# Evaluate the convergence of the Monte Carlo sampling using "shinystan"
# to view various graphical and statistical summaries that indicate
# how well the sampling converged.
launch_shinystan(combinedChains)

# Calculate the conditional probability that a field sample is associated
# with probability density function (pdf) 1 in the finite mixture model.
condProbs1 <- calcCondProbs1(transData, nPCs, combinedChains)
save(condProbs1, file = "CondProbs1.dat")

# Compare observed and replicated test statistics.
obsTestStats <- calcObsTestStats(transData, nPCs, condProbs1)
save(obsTestStats, file = "ObsTestStats.dat")
plotTMeanSd(combinedChains, obsTestStats)
plotTCorr(combinedChains, obsTestStats)

# Back transform the parameters of the finite mixture model to the simplex.
simplexModPar <- backTransform(gcData, nPCs, transData, combinedChains)
save( simplexModPar, file = "SimplexModPar.dat")

# Set the order in which the elements are plotted by using a vector.
elementOrder <- c( "Sr", "U", "Y", "Nb", "La",
                   "Ce", "Th", "Na", "Al", "Ga",
                   "Be", "K", "Rb", "Ba", "Pb",
                   "Cu", "Zn", "Mo", "Mg", "Sc",
                   "Co", "Fe", "V", "Ni", "Cr",
                   "Ca", "P", "Ti", "Li", "Mn",
                   "Sn", "As", "Bi", "Cd", "In",
                   "S",  "Sb", "Tl", "W", "EE")

# Plot the medians of the compositional means for both pdfs in the finite
# mixture model.
plotCompMeans(simplexModPar, elementOrder)

# Calculate the the sample centers for all field samples from
# the geochemical survey.
simplexStats <- calcSimplexStats(gcData)
save(simplexStats, file = "SimplexStats.dat")

# Plot the translated compositional means. The translation is
# calculated with the sample center for the current subset of field samples.
plotTransCompMeans( simplexModPar, simplexStats, gcData, elementOrder)

# Plot the scaled variation matrices.
plotSqrtVarMatrices( simplexModPar, elementOrder, colorScale = "spectrum" )

# Plot the clusters on a map.
map('state', regions = 'colorado', fill = TRUE, col = "gray60", border = "white")
map.axes()

plotClusters(gcData, condProbs1, symbolSizes = rep.int(2/3, 4))

# Partition the geochemical data according to the
# conditional probabilities
theSplits <- splitGcData(gcData, condProbs1, threshold = 0.10 )

# Create new directories and save the splits. In the default directory,
# where Process1 is, create two more directories: Process11 and Process12.

### Beware: The next line overwrites gcData !!!
gcData <- theSplits$gcData1
save( gcData, file = "..\\Process11\\gcData.dat" )

### Beware: The next line overwrites gcData !!!
gcData <- theSplits$gcData2
save( gcData, file = "..\\Process12\\gcData.dat" )