library(gwsem)

# Read the phenotype data into R and look at the data
phenoData <- read.table("phenoData.txt", header=TRUE)
head(phenoData)

# This would be a great time to recode the data, transform it, and tell R if we have ordinal or binary indicators using mxFactor()
# The data for this example are simulated and continuous, and therefore, we will not be doing anything now, but if you would like 
# to chop the indicators up into binary or ordinal variables, this would be a great time to do it.


# The first thing that we would want to do is build a general model. 
# This model takes a series of arguments: 

                                                                                     # You must tell GW-SEM:
addFac <- buildOneFac(phenoData = phenoData,                                         # what the data object is (which you read in above)
                     itemNames = c("tobacco", "cannabis", "alcohol"),                # what the items of the latent factor are
                     covariates=c('pc1','pc2','pc3','pc4','pc5'),                    # what covariates that you want to include in the analysis
                     fitfun = "WLS")                                                 # and the fit function that you would like to use (WLS is much faster than ML)

# You can take this object (addFac) which is technically an OpenMx model, and simply fit it using mxRun() 
addFacFit <- mxRun(addFac)
summary(addFacFit)

# This is strongly advised, as it is a great time to learn if your model is being specified in a way that you didn't expect or that 
# the model is not giving you the answers that you are expecting

# Now, provided that the model looks reasonable, you can plug the model that you have built into the GWAS function
                                                                                     # To fit this function, you must tell GW-SEM :
GWAS(model = addFac,                                                                 # what model object you would like to fit
	snpData = 'example.pgen',                                                        # that path to the snpData file.
	out="latFac.log",                                                                # the file that you would like to save the full results into
	SNP=1:200)                                                                       # the index of the snps (how many) you would like to fit

# Note about snpData: The path to your snpData will likely include switching to a different directory (as you will likely do your analysis in a different
# folder than your SNP data). All you need to do is point to the data using relative paths. Further, it is able to take plink bed/bim/fam or pgen/psam/pvar 
# data or bgen data (oxford format)

# Note about SNP: This can be used to run a limited number of SNP (i.e. not the whole snp file). This is particularly useful if you would like to run chop up a 
# chr into several parts without cutting you actual genotype data into seperate files.

# This will take a while and frequently be done on a cluster.  If you chop up this script and remove the comments and insert you data, you can use it on your cluster


# The next step is to read the data in. While you are unlikely to want to read all of the output into R, it is useful to do this on a small test GWAS analysis
# to ensure that you are getting reasonable results.

FullResult <- read.delim("latFac.log")                            # This is didactically useful, but it contains much more information than most people want


# Then, we can read the results into R for a specific parameter. This function takes takes two arguments: the path to the data and the column in the results 
# file for the parameter that you want to examine.

succint <- loadResults(path = "latFac.log", focus =  "snp_to_F")

# I will give you a way to read several files into a single R object 

# Lastly, you can plot the results
plot(succint)


### The Residuals Model

# The process is very similar for the Latent factor model except that we are regressing the individual items on the SNP

# Again, the first thing that we would want to do is build a general model. 
# The function tells GW-SEM that you want to run the residuals model. This model takes a series of arguments: 
                                                                                       # You must tell GW-SEM:
addFacRes <- buildOneFacRes(phenoData,                                                 # what the data object is (which you read in above)
                     c("tobacco", "cannabis", "alcohol"),                              # what the items of the latent factor are
                     covariates=c('pc1','pc2','pc3','pc4','pc5'),                      # what covariates that you want to include in the analysis
                     fitfun = "WLS")                                                   # and the fit function that you would like to use (WLS is much faster than ML)


# Again, You can take this object (addFac) which is technically an OpenMx model, and simply fit it using mxRun() 
addFacResFit <- mxRun(addFacRes)
summary(addFacResFit)


# Then you will use the same function that we used for the latent variable model, with a different model object

GWAS(model = addFacRes, snpData = 'example.pgen', out="facRes.log", SNP=1:200)                 

# It is instructive to look at the results again, but this might be easier to do in linux rather than R
# This is how you load it into R
ResResult <- read.delim("facRes.log")                            


# Now that we have run the residual's model, we have multiple parameters that we want to load into R (Three in this case)
# The function to do this is the same as for one parameter, but you need to do it several times (once for each paramter). This gives several R objects.

succintTob <- loadResults(path = "facRes.log", focus =  "snp_to_tobacco")
succintCan <- loadResults(path = "facRes.log", focus =  "snp_to_cannabis")
succintAlc <- loadResults(path = "facRes.log", focus =  "snp_to_alcohol")

# Now we can plot all the residual manhattan plots

plot(succintTob)
plot(succintCan)
plot(succintAlc)


# Additional considerations: 

# Because we are working with numerous chromosomes, which we have chopped up into different chunks, we have several log files for each chromosome. For example, 
# we may have a set of log files such as: fac1a.log, fac1b.log, fac1c.log, fac1d.log, fac1e.log, fac1f.log, fac1g.log, fac1h.log.
# To simplify things, we can construct a list of file names for each chr with the following code.

c1  <- paste0(paste0("fac1",  letters)[1:8], ".log")
c2  <- paste0(paste0("fac2",  letters)[1:9], ".log")
c3  <- paste0(paste0("fac3",  letters)[1:7], ".log")
c4  <- paste0(paste0("fac4",  letters)[1:8], ".log")
c5  <- paste0(paste0("fac5",  letters)[1:7], ".log")
c6  <- paste0(paste0("fac6",  letters)[1:7], ".log")
c7  <- paste0(paste0("fac7",  letters)[1:6], ".log")
c8  <- paste0(paste0("fac8",  letters)[1:6], ".log")
c9  <- paste0(paste0("fac9",  letters)[1:5], ".log")
c10 <- paste0(paste0("fac10", letters)[1:5], ".log") 
c11 <- paste0(paste0("fac11", letters)[1:5], ".log")
c12 <- paste0(paste0("fac12", letters)[1:5], ".log")
c13 <- paste0(paste0("fac13", letters)[1:4], ".log")
c14 <- paste0(paste0("fac14", letters)[1:4], ".log")
c15 <- paste0(paste0("fac15", letters)[1:3], ".log")
c16 <- paste0(paste0("fac16", letters)[1:3], ".log")
c17 <- paste0(paste0("fac17", letters)[1:3], ".log")
c18 <- paste0(paste0("fac18", letters)[1:3], ".log")
c19 <- paste0(paste0("fac19", letters)[1:3], ".log")
c20 <- paste0(paste0("fac20", letters)[1:2], ".log")
c21 <- paste0(paste0("fac21", letters)[1:2], ".log")
c22 <- paste0(paste0("fac22", letters)[1:2], ".log")

# Then we can plug these files into the loadResults function so that all the data is loaded into a single object.

res <- loadResults(c(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10,
	                 c11, c12, c13, c14, c15, c16, c17, c18, c19, c20,
					 c21, c22), "snp_to_F")