require(GenomicSEM) ###################################### ##############PART 1################## ##European Continuous Traits Example### ##################################### setwd("EUR") #####Step 1: Munge the data#### files<-c("panUKB_BMI_EUR_chr1.txt", "Yengo_Height_EUR_chr1.txt") #define the reference file being used to allign alleles across summary stats #here we are using hapmap3 hm3<-"eur_w_ld_chr/w_hm3.snplist" #name the traits trait.names<-c("BMI","Height") #list the sample sizes. #Yengo has sample size in the file #but pan UKB does not so we provide that directly N<-c(419163, NA) #define the imputation quality filter info.filter=0.9 #define the MAF filter maf.filter=0.01 #run munge munge(files=files,hm3=hm3,trait.names=trait.names,N=N,info.filter=info.filter,maf.filter=maf.filter) ###Step 2: run LDSC #traits = the name of the .sumstats.gz traits produced by munge traits<-c("BMI.sumstats.gz","Height.sumstats.gz") ##ld = folder of LD scores ld <- "eur_w_ld_chr/" #wld = folder of LD scores wld <- "eur_w_ld_chr/" #sample.prev = the proportion of cases to total sample size. For quantitative traits list NA sample.prev<-c(NA,NA) #population.prev = the population lifetime prevalence of the traits. For quantitative traits list NA population.prev<-c(NA,NA) #trait.names = optional fifth argument to list trait names so they can be named in your model trait.names<-c("BMI", "Height") LDSC_EUR <- ldsc(traits=traits,sample.prev=sample.prev,population.prev=population.prev, ld=ld, wld=wld,trait.names=trait.names) save(LDSC_EUR, file = "LDSC_EUR.RData") #genetic covariance matrix LDSC_EUR$S #genetic correlation matrix cov2cor(LDSC_EUR$S) #lets grab teh standard errors from the V matrix k<-nrow(LDSC_EUR$S) SE<-matrix(0, k, k) SE[lower.tri(SE,diag=TRUE)] <-sqrt(diag(LDSC_EUR$V)) #matrix of Z-stats Z<-LDSC_EUR$S/SE #matrix of p-values P<-2*pnorm(Z,lower.tail=FALSE) #create a genetic heatmap require(corrplot) rownames(LDSC_EUR$S)<-colnames(LDSC_EUR$S) corrplot(corr = cov2cor(LDSC_EUR$S), method = "color", addCoef.col = "dark grey", add = F, bg = "white", diag = T, outline = T, mar = c(0,0,2,0), number.cex=2, cl.pos = "b", cl.ratio = 0.125, cl.align.text = "l", cl.offset = 0.2, tl.srt=45, tl.pos = "lt", tl.offset=0.2, tl.col = "black", pch.col = "white", addgrid.col = "black", xpd = T, tl.cex=1.3, is.corr=TRUE, title = "European") rm(list = ls()) ######################### ######PART 2############## ##East Asian Example### ########################## setwd("../EAS/") #####Step 1: Munge the code#### files<-c("BMI_EAS_chr1.txt","Yengo_Height_EAS_chr1.txt") #define the reference file being used to allign alleles across summary stats #here we are using hapmap3 hm3<-"eas_ldscores/w_hm3.snplist" #name the traits trait.names<-c("BMI","Height") #list the sample sizes. #Yengo has sample size in the file #but pan UKB does not so we enter it manually N<-c(163835, NA) #definte the imputation quality filter info.filter=0.9 #define the MAF filter maf.filter=0.01 #run munge munge(files=files,hm3=hm3,trait.names=trait.names,N=N,info.filter=info.filter,maf.filter=maf.filter) ###Step 2: run LDSC #traits = the name of the .sumstats.gz traits produced by munge traits<-c("BMI.sumstats.gz","Height.sumstats.gz") ##ld = folder of LD scores ld <- "eas_ldscores/" #wld = folder of LD scores wld <- "eas_ldscores/" #sample.prev = the proportion of cases to total sample size. For quantitative traits list NA sample.prev<-c(NA,NA) #population.prev = the population lifetime prevalence of the traits. For quantitative traits list NA population.prev<-c(NA,NA) #trait.names = optional fifth argument to list trait names so they can be named in your model trait.names<-c("BMI", "Height") LDSC_EAS <- ldsc(traits=traits,sample.prev=sample.prev,population.prev=population.prev, ld=ld, wld=wld,trait.names=trait.names) save(LDSC_EAS, file = "LDSC_EAS.RData") #genetic covariance matrix LDSC_EAS$S #genetic correlation matrix cov2cor(LDSC_EAS$S) #standard errors k<-nrow(LDSC_EAS$S) SE<-matrix(0, k, k) SE[lower.tri(SE,diag=TRUE)] <-sqrt(diag(LDSC_EAS$V)) #matrix of Z-stats Z<-LDSC_EAS$S/SE #matrix of p-values P<-2*pnorm(Z,lower.tail=FALSE) #create corrplot again rownames(LDSC_EAS$S)<-colnames(LDSC_EAS$S) corrplot(corr = cov2cor(LDSC_EAS$S), method = "color", addCoef.col = "dark grey", add = F, bg = "white", diag = T, outline = T, mar = c(0,0,2,0), number.cex=2, cl.pos = "b", cl.ratio = 0.125, cl.align.text = "l", cl.offset = 0.2, tl.srt=45, tl.pos = "lt", tl.offset=0.2, tl.col = "black", pch.col = "white", addgrid.col = "black", xpd = T, tl.cex=1.3, is.corr=TRUE, title = "East Asian") rm(list = ls()) ###################################### ##############PART 3################## ##European Binary (case/control) Traits Example### ##################################### setwd("../EUR/") #####Step 1: Munge the data#### files<-c("SCZ_chr1.txt", "BIP_chr1.txt") #define the reference file being used to allign alleles across summary stats #here we are using hapmap3 hm3<-"eur_w_ld_chr/w_hm3.snplist" #name the traits trait.names<-c("SCZ","BIP") #list the sample sizes. #we write NA here as these files already have Neff as columns N<-c(NA, NA) #define the imputation quality filter info.filter=0.9 #define the MAF filter maf.filter=0.01 #run munge munge(files=files,hm3=hm3,trait.names=trait.names,N=N,info.filter=info.filter,maf.filter=maf.filter) ###Step 2: run LDSC #traits = the name of the .sumstats.gz traits produced by munge traits<-c("SCZ.sumstats.gz","BIP.sumstats.gz") ##ld = folder of LD scores ld <- "eur_w_ld_chr/" #wld = folder of LD scores wld <- "eur_w_ld_chr/" #sample.prev: enter as 0.5 because used sum of effecitve N, #that accounts for sample ascertainment sample.prev<-c(.5,.5) #population.prev = the population lifetime prevalence of the traits. population.prev<-c(.01,.02) #trait.names = optional fifth argument to list trait names so they can be named in your model trait.names<-c("SCZ", "BIP") LDSC_SCZ_BIP <- ldsc(traits=traits,sample.prev=sample.prev,population.prev=population.prev, ld=ld, wld=wld,trait.names=trait.names) save(LDSC_SCZ_BIP , file = "LDSC_SCZ_BIP .RData") #genetic covariance matrix LDSC_SCZ_BIP$S #genetic correlation matrix cov2cor(LDSC_SCZ_BIP$S) #lets grab teh standard errors from the V matrix k<-nrow(LDSC_SCZ_BIP$S) SE<-matrix(0, k, k) SE[lower.tri(SE,diag=TRUE)] <-sqrt(diag(LDSC_SCZ_BIP$V)) #matrix of Z-stats Z<-LDSC_SCZ_BIP$S/SE #matrix of p-values P<-2*pnorm(Z,lower.tail=FALSE) #create a genetic heatmap rownames(LDSC_SCZ_BIP$S)<-colnames(LDSC_SCZ_BIP$S) corrplot(corr = cov2cor(LDSC_SCZ_BIP$S), method = "color", addCoef.col = "dark grey", add = F, bg = "white", diag = T, outline = T, mar = c(0,0,2,0), number.cex=2, cl.pos = "b", cl.ratio = 0.125, cl.align.text = "l", cl.offset = 0.2, tl.srt=45, tl.pos = "lt", tl.offset=0.2, tl.col = "black", pch.col = "white", addgrid.col = "black", xpd = T, tl.cex=1.3, is.corr=TRUE, title = "SCZ / BIP") rm(list = ls())