library(gwsem) source("setup.R") # Read the phenotype data into R phenoData <- read.table(paste0(dir,"plink2.psam"), header=TRUE) # This would be an ideal place to clean the data and look at it more carefully covariates <- paste0('pc',1:numPCs) # ------------- GWAS on the latent factor # Construct the model that we would want to run for the GWAS (one factor model) latFac <- buildOneFac(phenoData, itemNames = indicatorNames, covariates = covariates) head(latFac$data$observed$snp) # fake data here latFac <- mxRun(latFac) # Run the model using mxRun to make sure that things look correct. # Does the model look reasonable with fake snp data? print(summary(latFac)) latFac <- GWAS(model=latFac, # he model that you want to run a GWAS on snpData=paste0(dir, 'plink2.pgen'), # Specify the path to the genetic data out = 'result2.log') # Where to write the results latResults <- loadResults('result2.log', focus = "snp_to_F", # deal with sign indeterminancy signAdj = "lambda_cannabis") print(head(latResults[order(-abs(latResults$Z)),])) # Print the most extreme z-scores maxLogP <- max(-log10(latResults$P)) # needed to make the best plot # ------------- GWAS on the indicator residuals resid <- buildOneFacRes(phenoData, itemNames = indicatorNames, covariates=covariates) residTest <- mxRun(resid) # Check to make sure that the model runs summary(residTest) # Does it? resid <- GWAS(resid, paste0(dir, 'plink2.pgen'), # run the GWAS out = 'result3.log') # Load the results for each variable in indicatorNames result <- list() for (ix in indicatorNames) { result[[ix]] <- loadResults('result3.log', focus = paste0('snp_to_', ix)) maxLogP <- max(c(maxLogP, -log10(result[[ix]]$P))) } ## Constructing Manhattan plots to display the results yrange <- c(0,.5+maxLogP) # Use the maximum possible -log10 p-value for the y-axis # par(new=FALSE) # plot(latResults, ylim=yrange) # Construct a Manhatttan Plot for the latent variable GWAS par(new=TRUE) # Don't start a new plot plot(result[['tobacco']], col="red", ylim=yrange) # Over top of the previous plot, plot the tobacco manhattan plot par(new=TRUE) # plot(result[['cannabis']], col="darkgreen", ylim=yrange) # plot the cannabis manhattan plot par(new=TRUE) # plot(result[['alcohol']], col="purple2", ylim=yrange) # plot the alcohol manhattan plot legend("topleft", c("Latent Factor", "Tobacco", "Cannabis", "Alcohol") , # Give it a legend col = c("black", "red", "darkgreen", "purple2"), pch=15, title= "Order of Plotting", bty = "n") # On your own, reverse the plot so that alcohol is plotted first, # followed by cannabis, then tobacco and finally the latent variable. ## Did the simulation work as expected? if (FALSE) { # Did we get hits where expected? hitTable <- read.table(paste0(dir,"hits.csv"), header=FALSE) hits <- hitTable$V2 names(hits) <- hitTable$V1 latResults[match(hits, latResults$MxComputeLoop1),] latResults1 <- result[[ indicatorNames[1] ]] latResults1[match(hits, latResults1$MxComputeLoop1),] latResults1 <- result[[ indicatorNames[2] ]] latResults1[match(hits, latResults1$MxComputeLoop1),] latResults1 <- result[[ indicatorNames[3] ]] latResults1[match(hits, latResults1$MxComputeLoop1),] }