library(gwsem)
source("setup.R")

# Read the phenotype data into R
phenoData <- read.table(paste0(dir,"plink2.psam"), header=TRUE)

# This would be an ideal place to clean the data and look at it more carefully

covariates <- paste0('pc',1:numPCs)

# ------------- GWAS on the latent factor

# Construct the model that we would want to run for the GWAS (one factor model)
latFac <- buildOneFac(phenoData,
                      itemNames = indicatorNames,
                      covariates = covariates)

head(latFac$data$observed$snp) # fake data here

latFac <- mxRun(latFac)                                                    # Run the model using mxRun to make sure that things look correct.

#  Does the model look reasonable with fake snp data?
print(summary(latFac))

latFac <- GWAS(model=latFac,                                                    # he model that you want to run a GWAS on
               snpData=paste0(dir, 'plink2.pgen'),                              # Specify the path to the genetic data
               out = 'result2.log')                                                 # Where to write the results
latResults <- loadResults('result2.log', focus = "snp_to_F",
                          # deal with sign indeterminancy
                          signAdj = "lambda_cannabis")
print(head(latResults[order(-abs(latResults$Z)),]))                             # Print the most extreme z-scores 
maxLogP <- max(-log10(latResults$P))                                            # needed to make the best plot

# ------------- GWAS on the indicator residuals

resid <- buildOneFacRes(phenoData, itemNames = indicatorNames,
                        covariates=covariates)
residTest <- mxRun(resid)                                                       # Check to make sure that the model runs
summary(residTest)                                                              # Does it?

resid <- GWAS(resid, paste0(dir, 'plink2.pgen'),                                # run the GWAS
              out = 'result3.log')

# Load the results for each variable in indicatorNames
result <- list()
for (ix in indicatorNames) {
  result[[ix]] <- loadResults('result3.log',
                              focus = paste0('snp_to_', ix))
  maxLogP <- max(c(maxLogP, -log10(result[[ix]]$P)))
}

## Constructing Manhattan plots to display the results

yrange <- c(0,.5+maxLogP)                                                       # Use the maximum possible -log10 p-value for the y-axis
                                                                                # 
par(new=FALSE)                                                                  #    
plot(latResults, ylim=yrange)                                                   #  Construct a Manhatttan Plot for the latent variable GWAS
par(new=TRUE)                                                                   # Don't start a new plot
plot(result[['tobacco']], col="red", ylim=yrange)                               # Over top of the previous plot, plot the tobacco manhattan plot
par(new=TRUE)                                                                   # 
plot(result[['cannabis']], col="darkgreen", ylim=yrange)                        # plot the cannabis manhattan plot
par(new=TRUE)                                                                   # 
plot(result[['alcohol']], col="purple2", ylim=yrange)                           # plot the alcohol manhattan plot


legend("topleft",   c("Latent Factor", "Tobacco", "Cannabis", "Alcohol") ,    # Give it a legend
       col = c("black", "red", "darkgreen", "purple2"), 
       pch=15, title= "Order of Plotting",  bty = "n")


# On your own, reverse the plot so that alcohol is plotted first,
# followed by cannabis, then tobacco and finally the latent variable.


## Did the simulation work as expected?

if (FALSE) {
  # Did we get hits where expected?
  hitTable <- read.table(paste0(dir,"hits.csv"), header=FALSE)
  hits <- hitTable$V2
  names(hits) <- hitTable$V1
  
  latResults[match(hits, latResults$MxComputeLoop1),]
  latResults1 <- result[[ indicatorNames[1] ]]
  latResults1[match(hits, latResults1$MxComputeLoop1),]
  latResults1 <- result[[ indicatorNames[2] ]]
  latResults1[match(hits, latResults1$MxComputeLoop1),]
  latResults1 <- result[[ indicatorNames[3] ]]
  latResults1[match(hits, latResults1$MxComputeLoop1),]
}