################################################################################################################# ##### EasyQC-script to perform file-level QC on imputed GWA data ##### EasyQC version: 8.5 ##### Programmer: Thomas Winkler, 2014-04-04 ##### Contact: thomas.winkler@klinik.uni-regensburg.de ################################################################################################################# DEFINE --pathOut /home/meike/2017/EasyQC/OUTPUT --acolIn SNP;CHR;BP;A1;A2;FRQ;INFO;BETA;SE;P --acolNewName SNPID;CHR;POS;EFFECT_ALLELE;OTHER_ALLELE;EAF;INFO;BETA;SE;PVAL --acolInClasses character;integer;numeric;character;character;numeric;numeric;numeric;numeric;numeric --strMissing NA --strSeparator TAB ### Please define here the input files of the study: EASYIN --fileIn /home/meike/2017/EasyQC/test_GWAS --fileInShortName PRACTICAL --astrSetNumCol INFO_TYPE=1;IMPUTED=1;CALLRATE=NA;HWE_PVAL=NA;STRAND=NA;N=8060 ################################################################################################################# ## EASYQC Scripting interface: START EASYQC ################ ## 1. Sanity checks: CLEAN --rcdClean !CHR%in%c(1:22,NA) --strCleanName numDropSNP_ChrXY --blnWriteCleaned 1 CLEAN --rcdClean is.na(EFFECT_ALLELE) & is.na(OTHER_ALLELE) --strCleanName numDrop_Missing_both_alleles --blnWriteCleaned 1 CLEAN --rcdClean is.na(PVAL) --strCleanName numDrop_Missing_P --blnWriteCleaned 1 CLEAN --rcdClean is.na(BETA) --strCleanName numDrop_Missing_BETA --blnWriteCleaned 1 CLEAN --rcdClean is.na(SE) --strCleanName numDrop_Missing_SE --blnWriteCleaned 1 CLEAN --rcdClean is.na(EAF) --strCleanName numDrop_Missing_EAF --blnWriteCleaned 1 CLEAN --rcdClean is.na(N) --strCleanName numDrop_Missing_N --blnWriteCleaned 1 CLEAN --rcdClean PVAL<0|PVAL>1 --strCleanName numDrop_invalid_PVAL --blnWriteCleaned 1 CLEAN --rcdClean SE<=0 --strCleanName numDrop_invalid_SE --blnWriteCleaned 1 CLEAN --rcdClean (EAF<0)|(EAF>1) --strCleanName numDrop_invalid_EAF --blnWriteCleaned 1 #################### ## 2. Prepare files for filtering and apply minimum thresholds: ## MAF CLEAN --rcdClean (EAF==0)|(EAF==1) --strCleanName numDrop_Monomorph --blnWriteCleaned 1 CLEAN --rcdClean N<1000 & (EAF<0.1|EAF>0.9) --strCleanName numDrop_MAF_0.10 --blnWriteCleaned 1 CLEAN --rcdClean N>=1000 & N<2000 & (EAF<0.05|EAF>0.95) --strCleanName numDrop_MAF_0.05 --blnWriteCleaned 1 CLEAN --rcdClean N>=2000 & (EAF<0.03|EAF>0.97) --strCleanName numDrop_MAF_0.03 --blnWriteCleaned 1 ## INFO CLEAN --rcdClean INFO_TYPE==1&INFO<0.4 --strCleanName numDrop_Imputed_MACH_lowImpQual --blnWriteCleaned 1 CLEAN --rcdClean INFO_TYPE==2&INFO<0.5 --strCleanName numDrop_Imputed_IMPUTE_lowImpQual --blnWriteCleaned 1 CLEAN --rcdClean INFO_TYPE==3&INFO<0.8 --strCleanName numDrop_Imputed_PLINK_lowImpQual --blnWriteCleaned 1 #################### #### 3. Harmonization of allele coding (I/D) ## The aim of this step is to compile uniform allele codes A/C/G/T or I/D from different versions of given alleles HARMONIZEALLELES --colInA1 EFFECT_ALLELE --colInA2 OTHER_ALLELE ## Remove INDELs. CLEAN --rcdClean (EFFECT_ALLELE%in%c('I','D')) | (OTHER_ALLELE%in%c('I','D')) --strCleanName numDrop_INDEL --blnWriteCleaned 1 ##################### #################### ## 4. Harmonization of marker names (compile 'cptid') CREATECPTID --fileMap /home/meike/2017/EasyQC/rsTEST --colMapMarker rsmid --colMapChr chr --colMapPos pos --colInMarker SNPID --colInA1 EFFECT_ALLELE --colInA2 OTHER_ALLELE --colInChr CHR --colInPos POS #################### ## Throw out duplicates based on cptid (b37). CLEANDUPLICATES --colInMarker cptid --strMode removeall ######################### ## 6. AF Checks MERGE --colInMarker cptid --fileRef /home/meike/2017/EasyQC/AFtest --acolIn cptid;a0;a1;eaf --acolInClasses character;character;character;numeric --strRefSuffix .ref --colRefMarker cptid --blnWriteNotInRef 1 --blnInAll 0 ## Align alleles with reference file. ADJUSTALLELES --colInA1 EFFECT_ALLELE --colInA2 OTHER_ALLELE --colInFreq EAF --colInBeta BETA --colRefA1 a0.ref --colRefA2 a1.ref --blnMetalUseStrand 1 --blnRemoveMismatch 1 --blnRemoveInvalid 1 --blnWriteMismatch 1 --blnWriteInvalid 1 ## All mismatches will be removed (e.g. A/T in input, A/C in reference) ################# ## Plot frequencies against HapMap reference frequencies. AFCHECK --colInFreq EAF --colRefFreq eaf.ref --numLimOutlier 0.2 --blnPlotAll 1 ## blnPlotAll 0 causes that only outlying SNPs with |Freq-Freq.ref|>0.2 will be plotted (way less computational time) ################ ## 9. QQ plot QQPLOT --acolQQPlot PVAL --numPvalOffset 0.05 --strMode subplot #################### ## 8. Plot Z versus P PZPLOT --colBeta BETA --colSe SE --colPval PVAL #################### ## Save cleaned file. GETCOLS --acolOut cptid;SNPID;CHR;POS;EFFECT_ALLELE;OTHER_ALLELE;EAF;INFO;SE;PVAL;BETA;N WRITE --strPrefix CLEANED. --strMissing . --strMode gz STOP EASYQC #################################################################################################################