require(GenomicSEM)

######################################
##############PART 1##################
##European Continuous Traits Example###
#####################################
setwd("EUR")

#####Step 1: Munge the data####
files<-c("panUKB_BMI_EUR_chr1.txt", "Yengo_Height_EUR_chr1.txt")

#define the reference file being used to allign alleles across summary stats
#here we are using hapmap3
hm3<-"eur_w_ld_chr/w_hm3.snplist"

#name the traits 
trait.names<-c("BMI","Height")

#list the sample sizes. 
#Yengo has sample size in the file 
#but pan UKB does not so we provide that directly
N<-c(419163, NA)

#define the imputation quality filter
info.filter=0.9

#define the MAF filter
maf.filter=0.01

#run munge
munge(files=files,hm3=hm3,trait.names=trait.names,N=N,info.filter=info.filter,maf.filter=maf.filter)

###Step 2: run LDSC
#traits = the name of the .sumstats.gz traits produced by munge
traits<-c("BMI.sumstats.gz","Height.sumstats.gz")

##ld = folder of LD scores 
ld <- "eur_w_ld_chr/"

#wld = folder of LD scores
wld <- "eur_w_ld_chr/"

#sample.prev = the proportion of cases to total sample size. For quantitative traits list NA
sample.prev<-c(NA,NA)

#population.prev = the population lifetime prevalence of the traits. For quantitative traits list NA
population.prev<-c(NA,NA)

#trait.names = optional fifth argument to list trait names so they can be named in your model
trait.names<-c("BMI", "Height")

LDSC_EUR <- ldsc(traits=traits,sample.prev=sample.prev,population.prev=population.prev, ld=ld, wld=wld,trait.names=trait.names)
save(LDSC_EUR, file = "LDSC_EUR.RData")

#genetic covariance matrix
LDSC_EUR$S

#genetic correlation matrix
cov2cor(LDSC_EUR$S)

#lets grab teh standard errors from the V matrix
k<-nrow(LDSC_EUR$S)
SE<-matrix(0, k, k)
SE[lower.tri(SE,diag=TRUE)] <-sqrt(diag(LDSC_EUR$V))

#matrix of Z-stats
Z<-LDSC_EUR$S/SE

#matrix of p-values
P<-2*pnorm(Z,lower.tail=FALSE)

#create a genetic heatmap 
require(corrplot)
rownames(LDSC_EUR$S)<-colnames(LDSC_EUR$S)

corrplot(corr = cov2cor(LDSC_EUR$S),
         method = "color",
         addCoef.col = "dark grey",
         add = F,
         bg = "white",
         diag = T,
         outline = T,
         mar = c(0,0,2,0),
         number.cex=2,
         cl.pos = "b",
         cl.ratio = 0.125,
         cl.align.text = "l",
         cl.offset = 0.2,
         tl.srt=45,
         tl.pos = "lt",
         tl.offset=0.2,
         tl.col = "black",
         pch.col = "white",
         addgrid.col = "black",
         xpd = T,
         tl.cex=1.3,
         is.corr=TRUE,
         title = "European")

rm(list = ls())


#########################
######PART 2##############
##East Asian Example###
##########################
setwd("../EAS/")

#####Step 1: Munge the code####
files<-c("BMI_EAS_chr1.txt","Yengo_Height_EAS_chr1.txt")

#define the reference file being used to allign alleles across summary stats
#here we are using hapmap3
hm3<-"eas_ldscores/w_hm3.snplist"

#name the traits 
trait.names<-c("BMI","Height")

#list the sample sizes. 
#Yengo has sample size in the file 
#but pan UKB does not so we enter it manually
N<-c(163835, NA)

#definte the imputation quality filter
info.filter=0.9

#define the MAF filter
maf.filter=0.01

#run munge
munge(files=files,hm3=hm3,trait.names=trait.names,N=N,info.filter=info.filter,maf.filter=maf.filter)

###Step 2: run LDSC

#traits = the name of the .sumstats.gz traits produced by munge
traits<-c("BMI.sumstats.gz","Height.sumstats.gz")

##ld = folder of LD scores 
ld <- "eas_ldscores/"

#wld = folder of LD scores
wld <- "eas_ldscores/"

#sample.prev = the proportion of cases to total sample size. For quantitative traits list NA
sample.prev<-c(NA,NA)

#population.prev = the population lifetime prevalence of the traits. For quantitative traits list NA
population.prev<-c(NA,NA)

#trait.names = optional fifth argument to list trait names so they can be named in your model
trait.names<-c("BMI", "Height")

LDSC_EAS <- ldsc(traits=traits,sample.prev=sample.prev,population.prev=population.prev, ld=ld, wld=wld,trait.names=trait.names)
save(LDSC_EAS, file = "LDSC_EAS.RData")

#genetic covariance matrix
LDSC_EAS$S

#genetic correlation matrix
cov2cor(LDSC_EAS$S)

#standard errors
k<-nrow(LDSC_EAS$S)
SE<-matrix(0, k, k)
SE[lower.tri(SE,diag=TRUE)] <-sqrt(diag(LDSC_EAS$V))

#matrix of Z-stats
Z<-LDSC_EAS$S/SE

#matrix of p-values
P<-2*pnorm(Z,lower.tail=FALSE)

#create corrplot again
rownames(LDSC_EAS$S)<-colnames(LDSC_EAS$S)

corrplot(corr = cov2cor(LDSC_EAS$S),
         method = "color",
         addCoef.col = "dark grey",
         add = F,
         bg = "white",
         diag = T,
         outline = T,
         mar = c(0,0,2,0),
         number.cex=2,
         cl.pos = "b",
         cl.ratio = 0.125,
         cl.align.text = "l",
         cl.offset = 0.2,
         tl.srt=45,
         tl.pos = "lt",
         tl.offset=0.2,
         tl.col = "black",
         pch.col = "white",
         addgrid.col = "black",
         xpd = T,
         tl.cex=1.3,
         is.corr=TRUE,
         title = "East Asian")

rm(list = ls())


######################################
##############PART 3##################
##European Binary (case/control) Traits Example###
#####################################
setwd("../EUR/")

#####Step 1: Munge the data####
files<-c("SCZ_chr1.txt", "BIP_chr1.txt")

#define the reference file being used to allign alleles across summary stats
#here we are using hapmap3
hm3<-"eur_w_ld_chr/w_hm3.snplist"

#name the traits 
trait.names<-c("SCZ","BIP")

#list the sample sizes. 
#we write NA here as these files already have Neff as columns
N<-c(NA, NA)

#define the imputation quality filter
info.filter=0.9

#define the MAF filter
maf.filter=0.01

#run munge
munge(files=files,hm3=hm3,trait.names=trait.names,N=N,info.filter=info.filter,maf.filter=maf.filter)

###Step 2: run LDSC
#traits = the name of the .sumstats.gz traits produced by munge
traits<-c("SCZ.sumstats.gz","BIP.sumstats.gz")

##ld = folder of LD scores 
ld <- "eur_w_ld_chr/"

#wld = folder of LD scores
wld <- "eur_w_ld_chr/"

#sample.prev: enter as 0.5 because used sum of effecitve N, 
#that accounts for sample ascertainment
sample.prev<-c(.5,.5)

#population.prev = the population lifetime prevalence of the traits. 
population.prev<-c(.01,.02)

#trait.names = optional fifth argument to list trait names so they can be named in your model
trait.names<-c("SCZ", "BIP")

LDSC_SCZ_BIP <- ldsc(traits=traits,sample.prev=sample.prev,population.prev=population.prev, ld=ld, wld=wld,trait.names=trait.names)
save(LDSC_SCZ_BIP , file = "LDSC_SCZ_BIP .RData")

#genetic covariance matrix
LDSC_SCZ_BIP$S

#genetic correlation matrix
cov2cor(LDSC_SCZ_BIP$S)

#lets grab teh standard errors from the V matrix
k<-nrow(LDSC_SCZ_BIP$S)
SE<-matrix(0, k, k)
SE[lower.tri(SE,diag=TRUE)] <-sqrt(diag(LDSC_SCZ_BIP$V))

#matrix of Z-stats
Z<-LDSC_SCZ_BIP$S/SE

#matrix of p-values
P<-2*pnorm(Z,lower.tail=FALSE)

#create a genetic heatmap 
rownames(LDSC_SCZ_BIP$S)<-colnames(LDSC_SCZ_BIP$S)

corrplot(corr = cov2cor(LDSC_SCZ_BIP$S),
         method = "color",
         addCoef.col = "dark grey",
         add = F,
         bg = "white",
         diag = T,
         outline = T,
         mar = c(0,0,2,0),
         number.cex=2,
         cl.pos = "b",
         cl.ratio = 0.125,
         cl.align.text = "l",
         cl.offset = 0.2,
         tl.srt=45,
         tl.pos = "lt",
         tl.offset=0.2,
         tl.col = "black",
         pch.col = "white",
         addgrid.col = "black",
         xpd = T,
         tl.cex=1.3,
         is.corr=TRUE,
        title = "SCZ / BIP")

rm(list = ls())