Here is the qualtrics link for this session https://qimr.az1.qualtrics.com/jfe/form/SV_eA90nL27Ww43rIa Here are the commands in a txt file if that is easier to work with # Today we will be working in scratch today as the files we're using are big mkdir -p /scratch/${USER}/{raw,mds,qc,saige} # first we're going to copy the plink files we'll be using as input cp /faculty/sarah/2023/Imputation/* /scratch/${USER}/saige/ mv /scratch/${USER}/saige/myers19.* /scratch/${USER}/raw/ #next we are going to start the singularity container singularity shell --hostname localhost --bind /scratch/${USER}:/data /usr/local/lib/singularity/ImputationProtocol.sif # and load the applications we'll be using setup-hadoop --n-cores 2 setup-imputationserver # Next we're going to set up the files for imputation # we start by make some MDS components (which are conceptually similar to PCs) # this is not technically part of an imputation protocol but it is good to check the ancestry of your sample before imputing cd /data/mds enigma-mds --bfile ../raw/myers19 # then we're going to do one final QC step cd /data/qc enigma-qc --bfile /data/raw/myers19 -c 19 --study-name myers19 # now we're going to start imputing imputationserver --study-name myers19 --population eur #OH NO!!! we have strand flips - luckily we can easily fix this check flip --bfile ../raw/myers19 # we need to remake the vcf files cd /data/qc enigma-qc --bfile /data/raw/myers19.check-flip -c 19 --study-name myers19 # then try the imputation again imputationserver --study-name myers19 --population eur ######################################################################## #Once the imputation is finished we will exit the container and process the data exit cd /scratch/${USER}/output/myers19/local/ #Unzipping the data 7za -ppassword x chr_19.zip tabix -f -p vcf -C chr19.dose.vcf.gz ######################################################################### #First we'll load SAIGE singularity shell --hostname localhost --bind /scratch/${USER}:/data /usr/local/lib/singularity/saige.sif cd /data/saige #Next we'll run step 1 of the SAIGE analysis in this step we run the LMM and creates a pre-processed r data file ./SAIGE_step1AD.sh #Next we'll run step 2 of the SAIGE analysis in this step we run the association analyses ./SAIGE_step2AD.sh exit ######################################################################### #Strech Goal! #Merge the GWAS results with the R2 from the imputation files and keep only those results with r2 >= 0.6 (i.e. good imputation quality) # Merge results GWAS with imputed information file and filter by R2 >= 6 (the results have been filtered by MAF before) # Extract columns of interest in GWAS results: CHR POS SNPID Allele1 Allele2 AF_Allele2 N BETA SE Tstat p.value cd /scratch/${USER}/saige awk '{print $1,$2,$3,$4,$5,$7,$8,$9,$10,$11,$12}' AD.chr19.SAIGE.txt > temp_AD.chr19.results.txt # Extract columns of interest in imputation information: SNP Rsq zcat ../output/myers19/local/chr19.info.gz | awk '{print $1,$7}' > temp_chr19.info.txt # Merge information from the GWAS and R2 awk 'NR==FNR{a[$1]=$2; next} $3 in a{print $0,a[$3]}' temp_chr19.info.txt temp_AD.chr19.results.txt > temp # Filer by R2 >= 0.6 and put the header echo 'CHR POS SNPID Allele1 Allele2 AF_Allele2 N BETA SE Tstat p.value Rsq' > AD.chr19.results.QC.txt awk '{if ($12>=0.6) print $0}' temp >> AD.chr19.results.QC.txt #### Plot the results: Manhattan plot and regional plot awk '{if (NR>1) print $1,$2,$11}' AD.chr19.results.QC.txt > MH_plot.AD.chr19.txt # Take this file to R and plot -log(10)p by POS to make chromsome level Manhattan plot #Next we'll find the lowest p-vlaue sort -k11 -g AD.chr19.results.QC.txt | head # lowest p-value at 19:45422946 , p = 9.3515193823689e-16 awk '{if (NR==1) print $1,$2,$3,$11; else if ($2>= 44922946 && $2<= 45922946) print $1,$2,"chr"$3,$11}' AD.chr19.results.QC.txt > region_ld.AD.chr19.txt # Take this file to R and plot -log(10)p by POS to make a regional plot