Here is the qualtrics link for this session
https://qimr.az1.qualtrics.com/jfe/form/SV_eA90nL27Ww43rIa




Here are the commands in a txt file if that is easier to work with

# Today we will be working in scratch today as the files we're using are big
mkdir -p /scratch/${USER}/{raw,mds,qc,saige}

# first we're going to copy the plink files we'll be using as input
cp /faculty/sarah/2023/Imputation/* /scratch/${USER}/saige/
mv /scratch/${USER}/saige/myers19.* /scratch/${USER}/raw/

#next we are going to start the singularity container
singularity shell --hostname localhost --bind /scratch/${USER}:/data /usr/local/lib/singularity/ImputationProtocol.sif

# and load the applications we'll be using 
setup-hadoop --n-cores 2
setup-imputationserver

# Next we're going to set up the files for imputation
# we start by make some MDS components (which are conceptually similar to PCs) 
# this is not technically part of an imputation protocol but it is good to check the ancestry of your sample before imputing
cd /data/mds
enigma-mds --bfile ../raw/myers19

# then we're going to do one final QC step
cd /data/qc
enigma-qc --bfile /data/raw/myers19 -c 19 --study-name myers19

# now we're going to start imputing
imputationserver --study-name myers19 --population eur

#OH NO!!! we have strand flips - luckily we can easily fix this
check flip --bfile ../raw/myers19

# we need to remake the vcf files  
cd /data/qc
enigma-qc --bfile /data/raw/myers19.check-flip -c 19 --study-name myers19

# then try the imputation again
imputationserver --study-name myers19 --population eur


########################################################################

#Once the imputation is finished we will exit the container and process the data
exit
cd /scratch/${USER}/output/myers19/local/

#Unzipping the data 
7za -ppassword x chr_19.zip
tabix -f -p vcf -C chr19.dose.vcf.gz

#########################################################################

#First we'll load SAIGE

singularity shell --hostname localhost --bind /scratch/${USER}:/data  /usr/local/lib/singularity/saige.sif
cd /data/saige

#Next we'll run step 1 of the SAIGE analysis in this step we run the LMM and creates a pre-processed r data file
 ./SAIGE_step1AD.sh

#Next we'll run step 2 of the SAIGE analysis in this step we run the association analyses
 ./SAIGE_step2AD.sh

exit

#########################################################################

#Strech Goal!
#Merge the GWAS results with the R2 from the imputation files and keep only those results with r2 >= 0.6 (i.e. good imputation quality)

# Merge results GWAS with imputed information file and filter by R2 >= 6 (the results have been filtered by MAF before)
# Extract columns of interest in GWAS results: CHR POS SNPID Allele1 Allele2 AF_Allele2 N BETA SE Tstat p.value
cd /scratch/${USER}/saige
awk '{print $1,$2,$3,$4,$5,$7,$8,$9,$10,$11,$12}' AD.chr19.SAIGE.txt > temp_AD.chr19.results.txt

# Extract columns of interest in imputation information: SNP Rsq
zcat ../output/myers19/local/chr19.info.gz | awk '{print $1,$7}' > temp_chr19.info.txt

# Merge information from the GWAS and R2
awk 'NR==FNR{a[$1]=$2; next} $3 in a{print $0,a[$3]}' temp_chr19.info.txt temp_AD.chr19.results.txt > temp

# Filer by R2 >= 0.6 and put the header
echo 'CHR POS SNPID Allele1 Allele2 AF_Allele2 N BETA SE Tstat p.value Rsq' > AD.chr19.results.QC.txt
awk '{if ($12>=0.6) print $0}' temp >> AD.chr19.results.QC.txt

#### Plot the results: Manhattan plot and regional plot
awk '{if (NR>1) print $1,$2,$11}' AD.chr19.results.QC.txt > MH_plot.AD.chr19.txt
# Take this file to R and plot -log(10)p by POS to make chromsome level Manhattan plot

#Next we'll find the lowest p-vlaue
sort -k11 -g AD.chr19.results.QC.txt | head # lowest p-value at 19:45422946 , p = 9.3515193823689e-16
awk '{if (NR==1) print $1,$2,$3,$11; else if ($2>= 44922946 && $2<= 45922946) print $1,$2,"chr"$3,$11}' AD.chr19.results.QC.txt > region_ld.AD.chr19.txt
# Take this file to R and plot -log(10)p by POS to make a regional plot