require(data.table) require(R.utils) require(MungeSumstats) require(readr) require(dplyr) ###################################### ##############PART 1################## ##European Continuous Traits Example### ##################################### ################################ ###########Yengo 2022: Height### ################################ #downloaded file from: https://www.joelhirschhornlab.org/giant-consortium-results Height<-fread("GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_EUR",data.table=FALSE) #convert to MAF Height$MAF<-ifelse(Height$EFFECT_ALLELE_FREQ > 0.5, 1-Height$EFFECT_ALLELE_FREQ, Height$EFFECT_ALLELE_FREQ) #subset to MAF > 1% Height<-subset(Height, Height$MAF > 0.01) #make SNPID null due to duplicate column that will be interested as rsid Height$SNPID<-NULL #output new file write.table(Height, file = "Yengo_Height_EUR.txt", col.names=TRUE,quote = FALSE, row.names = FALSE) ################################ ###########Pan UKB: BMI######## ################################ #downloaded the file form terminal using wget #wget https://pan-ukb-us-east-1.s3.amazonaws.com/sumstats_flat_files/continuous-21001-both_sexes-irnt.tsv.bgz #unzip the file BMI <- gunzip("continuous-21001-both_sexes-irnt.tsv.bgz", "BMI_PanUKB.tsv") #read in the tsv file BMI <- read_tsv("BMI_PanUKB.tsv") #convert to dataframe BMI<-data.frame(BMI) #convert allele frequency to MAF BMI$MAF<-ifelse(BMI$af_EUR > 0.5, 1-BMI$af_EUR, BMI$af_EUR) #subset fo MAF > 1% BMI<-subset(BMI, BMI$MAF > .01) #subset to only european data attach(BMI) BMI<-data.frame(chr,pos,ref,alt,beta_EUR,se_EUR,pval_EUR,MAF) #unzip and read in the rsID file from panUKB rsid<-gunzip("full_variant_qc_metrics.txt.bgz", "panuKBrsID.txt") RSid<-fread("panuKBrsID.txt",data.table=FALSE) #save just the chromosome, base pair position, and rsID for merging with BMI attach(RSid) RSid<-data.frame(chrom,pos,rsid) #rename column to match naming in BMI and merge on chromosome and base pair colnames(RSid)[1]<-"chr" BMI<-inner_join(RSid,BMI,by=c("chr","pos")) #now we have a file in the format that standard LDSC expects head(BMI) #except that the _EUR flag will not be recognized, so let's also rename those columns colnames(BMI)[6:8]<-c("beta","SE","pval") #ready to go right? not so fast: pvalue column is not in format that is expected #they report ln(p) #can convert back to regular pvalue by either calculating p on the Z-stat #or backing out from the natural log, either works just as well #BMI$pval<-2*pnorm(abs(BMI$beta)/BMI$SE,lower.tail=FALSE) BMI$pval<-exp(1)^BMI$pval #now let's output this file to be munged write.table(BMI, file = "panUKB_BMI_EUR.txt", col.names=TRUE,quote = FALSE, row.names = FALSE) #now for the practical just going to subset to chromosome 1 #NOTE THAT THIS PART YOU WOULD NOT DO IN PRACTICE Height<-fread("Yengo_Height_EUR.txt",data.table=FALSE) Height<-subset(Height, Height$CHR == 1) write.table(Height, file = "Yengo_Height_EUR_chr1.txt", col.names=TRUE,quote = FALSE, row.names = FALSE) BMI<-fread("panUKB_BMI_EUR.txt",data.table=FALSE) BMI<-subset(BMI, BMI$chr == 1) write.table(BMI, file = "panUKB_BMI_EUR_chr1.txt", col.names=TRUE,quote = FALSE, row.names = FALSE) ######################### ######PART 2############## ##East Asian Example### ########################## ################################ ###########biobank Japan: BMI### ################################ #downloaded file from: https://pheweb.jp/ BMI_Japan<-fread("GWASsummary_BMI_Japanese_SakaueKanai2020.auto.txt.gz",data.table=FALSE) #convert to MAF BMI_Japan$MAF<-ifelse(BMI_Japan$A1FREQ > 0.5, 1-BMI_Japan$A1FREQ, BMI_Japan$A1FREQ) #subset to MAF > 1% BMI_Japan<-subset(BMI_Japan, BMI_Japan$MAF > 0.01) attach(BMI_Japan) BMI_Japan<-data.frame(SNP,CHR,BP,ALLELE1,ALLELE0,INFO,MAF,BETA,SE,P_BOLT_LMM_INF) colnames(BMI_Japan)[10]<-"PVAL" #output new file write.table(BMI_Japan, file = "BMI_Japan.txt", col.names=TRUE,quote = FALSE, row.names = FALSE) rm(BMI_Japan) ################################ ###########Yengo 2022: Height######## ################################ #downloaded file from: https://www.joelhirschhornlab.org/giant-consortium-results Height<-fread("GIANT_HEIGHT_YENGO_2022_GWAS_SUMMARY_STATS_EAS.gz",data.table=FALSE) #convert to MAF Height$MAF<-ifelse(Height$EFFECT_ALLELE_FREQ > 0.5, 1-Height$EFFECT_ALLELE_FREQ, Height$EFFECT_ALLELE_FREQ) #subset to MAF > 1% Height<-subset(Height, Height$MAF > 0.01) #make SNPID null due to duplicate column that will be interested as rsid Height$SNPID<-NULL #output new file write.table(Height, file = "Yengo_Height_EAS.txt", col.names=TRUE,quote = FALSE, row.names = FALSE) rm(Height) #now for the practical just going to subset to chromosome 1 #NOTE THAT THIS PART YOU WOULD NOT DO IN PRACTICE Height<-fread("Yengo_Height_EAS.txt",data.table=FALSE) Height<-subset(Height, Height$CHR == 1) write.table(Height, file = "Yengo_Height_EAS_chr1.txt", col.names=TRUE,quote = FALSE, row.names = FALSE) BMI<-fread("BMI_Japan.txt",data.table=FALSE) head(BMI) BMI<-subset(BMI, BMI$CHR == 1) write.table(BMI, file = "BMI_EAS_chr1.txt", col.names=TRUE,quote = FALSE, row.names = FALSE) #prep the psychiatric traits for practical setwd("~/Desktop/Workshops/ISGWorkshop_2023/Practical/EUR/") BIP<-fread("pgc-bip2021-all.vcf.tsv",data.table=FALSE) head(BIP) colnames(BIP)[1]<-"CHR" BIP<-subset(BIP, BIP$CHR == 1) write.table(BIP, file = "BIP_chr1.txt") SCZ<-fread("PGC3_SCZ_wave3.european.autosome.public.v3.vcf.tsv",data.table=FALSE) SCZ$NEFF<-SCZ$NEFF*2 SCZ<-subset(SCZ, SCZ$CHROM == 1) write.table(SCZ, file = "SCZ_chr1.txt")