############################################## # Run Common Factor Model # ############################################## #STEP 0: load in GenomicSEM #load in the devtools package to load R packages from github #install.packages("devtools") #library(devtools) #install the package #install_github("MichelNivard/GenomicSEM") #load in the package require(GenomicSEM) getwd() #load in the example ldsc objects that we will use for the practical setwd("/home/andrew/GenomicSEM_BoulderWorkshop2020/") load("GenomicSEMPractical.RData") #STEP 1: MUNGE THE FILES (done for you already) #Takes four necessary arguments: #1. files = the name of the summary statistics files #setwd("~/Downloads/MichiganWorkshop_GenomicSEMFiles/") #files<-c("SCZ_HM3.txt", "BIP_HM3.txt", "MDD_HM3.txt") #2. hm3 = the name of the reference file to use for alligning effects to same ref allele across traits #can be found on our github #hm3<-"w_hm3.noMHC.snplist" #3. trait.names = names used to create the .sumstats.gz output files #trait.names<-c("SCZ_HM3","BIP_HM3","MDD_HM3") #4. N = total sample size for traits #N<-c(105318, 16731, 173005) #Run the munge function. This will create three .sumstats.gz files (e.g., SCZ_HM3.sumstats.gz). #munge(files=files,hm3=hm3, trait.names=trait.names, N=N) #STEP 2: RUN LD-SCORE REGRESSION (also done) #Takes five necessary arguments: #1. traits = the name of the .sumstats.gz traits #traits<- c("SCZ_HM3.sumstats.gz", "BIP_HM3.sumstats.gz", "MDD_HM3.sumstats.gz") #2. sample.prev = the proportion of cases to total sample size. For quantitative traits list NA #sample.prev <- c(.39,.45,.35) #3. population.prev = the population lifetime prevalence of the traits. For quantitative traits list NA #population.prev <- c(.01,.01,.16) ##4. ld = folder of LD scores #can be found on our github #ld <- "eur_w_ld_chr/" #5. wld = folder of LD scores #can be found on our github #wld <- "eur_w_ld_chr/" #6. trait.names = optional sixth argument to list trait names so they can be named in your model #trait.names<-c("SCZ", "BIP", "MDD") #Run the ldsc function #PSYCH_COV<-ldsc(traits=traits, sample.prev=sample.prev, population.prev=population.prev, ld=ld, wld=wld,trait.names=trait.names) ##in many cases may want to save the data for later use. Example code provided here #save(PSYCH_COV, file="PSYCH.RData") #STEP 3: ESTIMATE THE COMMON FACTOR MODEL #requires only one necessary argument: #1. covstruc = the output from the ldsc function covstruc<-PSYCH_COV #an optional second argument can be provided for the estimation method estimation<-"DWLS" #run the commonfactor model below PFactor <- commonfactor(covstruc=covstruc,estimation=estimation) #Print PFactor results PFactor$results ############################################## # Run User Specified Model: Part 1 # ############################################## #STEP 3: SPECIFY AND RUN USER MODEL #Takes two necessary arguments: #1. covstruc = the output from multivariable ldsc #in this example = ldsc results for Schizophrenia, Bipolar, MDD, EA, and Insomnia covstruc<-PRAC_COV #2. model = the user specified model ##delete to write your own!! MY.model<-"F1=~NA*SCZ+BIP+MDD F1~~1*F1 INSOM~F1 EA~INSOM" #estimation = an optional third argument specifying the estimation method to use estimation<-"DWLS" #std.lv = optional fourth argument specifying whether variances of latent variables should be set to 1 std.lv=FALSE #Run your model YourModel<- usermodel(covstruc=covstruc, model=MY.model,estimation=estimation,std.lv=std.lv) #print the results of your model YourModel$results #print the model fit of your model YourModel$modelfit ############################################## # Multivariate GWAS in Genomic SEM # ############################################## #Note that Steps 1 and 2 mirror those from the first example #STEP 3: PREPARE SUMMARY STATISTICS FOR MULTIVARIATE GWAS #Takes four necessary arguments: #1. files = the name of the summary statistics file ##**note that these are drastically reduced subsets of SNPs for the practical only files<-c("SCZ_100.txt", "BIP_100.txt", "MDD_100.txt") #2. ref = the name of the reference file used to obtain SNP MAF #**note again that this is a drastically reduced subset of SNPs ref="reference.1000G.subset.txt" #3. trait.names = the name of the files to be used in trait.names=c("SCZ","BIP","MDD") #4. se.logit = whether the standard errors are on an logistic scale se.logit<-c(T,T,T) #run the sumstats function below p_sumstats <- sumstats(files=files,ref=ref ,trait.names=trait.names,se.logit=se.logit) #STEP 4a: RUN THE MULTIVARIATE GWAS #commonfactorGWAS takes only two necessary arguments #1. covstruc = the output from the ldsc function covstruc<-PSYCH_COV #2. SNPs = output from sumstats function SNPs<-p_sumstats #3. estimation = optional third argument specifying estimation method to be used estimation<-"DWLS" #4. parallel = optional argument specifying whether it should be run in parallel #set to FALSE here just for the practical parallel<-FALSE #5. SNPSE = optional argument specifying value of standard error for SNP SNPSE<-.005 #run the multivariate GWAS below pfactor_GWAS<-commonfactorGWAS(covstruc=covstruc, SNPs=SNPs, estimation = estimation,parallel=parallel,SNPSE=SNPSE) ##print the first five rows of the output pfactor_GWAS[1:5,] ##look at fail messages (0 = good to go) table(pfactor_GWAS$fail) #look at warning messages table(pfactor_GWAS$warning) #STEP 4b: RUN A USER SPECIFIED MULTIVARIATE GWAS #userGWAS takes three necessary arguments: #1. covstruc = the output from the ldsc function covstruc<-PSYCH_COV #2. SNPs = output from sumstats function SNPs<-p_sumstats #3. model = the model to be run #going to troubleshoot estimated ov variances are negative #by adding model constraint for all residuals to be above 0 model<-"F1=~SCZ+BIP+MDD F1~SNP SCZ~~a*SCZ BIP~~b*BIP MDD~~c*MDD a > .001 b > .001 c > .001" #4. modelchi = optional argument whether you want model chi-square for the individual model #default = FALSE modelchi<-FALSE #5. estimation = optional argument specifying estimation method to be used estimation<-"DWLS" #6. sub = optional argument specifying component of model output to be saved sub<-"F1~SNP" #7. SNPSE = optional argument specifying value of standard error for SNP SNPSE<-.005 #8. parallel = optional argument specifying whether it should be run in parallel #set to FALSE here just for the practical parallel<-FALSE #run the multivariate GWAS below pfactor_GWAS2<-userGWAS(covstruc=covstruc, SNPs=SNPs, model=model,modelchi=modelchi, estimation = estimation,sub=sub,SNPSE=SNPSE,parallel=parallel) ##print the first five rows of the userGWAS output pfactor_GWAS2[[1]][1:5,] ##see if we solved the warning problem table(pfactor_GWAS2[[1]]$warning) ###IF theres time load("Anthro_LDSC.RData") colnames(anthro$S) covstruc<-anthro #2. model = the user specified model anthro.model<-"" #estimation = an optional third argument specifying the estimation method to use estimation<-"DWLS" #std.lv = optional fourth argument specifying whether variances of latent variables should be set to 1 std.lv=FALSE #Run your model AnthroModel<- usermodel(covstruc=covstruc, model=anthro.model,estimation=estimation,std.lv=std.lv)