### EXERCISE 1. ASSOCIATION ANALYSIS FOR A BINARY TRAIT. # Go to the case-control folder and check the files you have there. How many cases and controls do you have? Is there any missing data? cd casecontrol awk '{print $6}' adclean.cc.fam | sort | uniq -c # Run an association analyses for the case-control variable AD. Check the log file and the output. plink --bfile adclean.cc --assoc --out 1_adclean.cc ### EXERCISE 2. LOGISTIC REGRESSION (BINARY TRAIT). # 2.1. Run a logisitic regression for the case-control variable AD including the principal components (to correct for ancestry) in file adpc.txt as covariates. plink --bfile adclean.cc --logistic --covar adpc.txt --out 2.1_adclean.cc # 2.2. Run a logisitic regression for the case-control variable AD including the principal components as covariates AND hiding the results of the covariates. # 2.3. Plot the results from exercise 2.2. # Get columns that we need to plot Manhattan and QQ plots: CHR, BP, P-VALUE. Our script is prepared to read a file with no header, and no missing data. awk '{if (NR>1) print $1,$3,$9}' 2.2_adclean.cc.assoc.logistic | grep -v NA > plot.adclean.cc.logistic.txt # Open in R the script Rscript_qqMan.R and plot the results. You can check first what your working directory is by typing pwd. # Explore LD pattern by uploading the data of chromosome 19 in LDlink. Prepare a file containing CHR, BP, SNP, P-VALUE (with headers). awk '{if (NR==1 || $1==19) print $1,$3,$2,$9}' 2.2_adclean.cc.assoc.logistic | grep -v NA > ld19.adclean.cc.logistic.txt # To find the rsnumber of the SNP with the lowest p-value: sort -k4 -r ld19.adclean.cc.logistic.txt | head ### EXERCISE 3. LINEAR REGRESSION (CONTINUOUS TRAIT) # Go to the continuous folder and check the files you have there. cd ../continuous/ # 3.1. Run a linear regression for the continuous trait including the principal components as covariates, hiding the results of the covariates, and using the --pheno option. plink --bfile adclean.cont --linear hide-covar --pheno adclean.cont.txt --covar adpc.txt --out 3.1_adclean.cont # 3.2. Plot the results from exercise 3.1. # Get columns that we need to plot Manhattan and QQ plots: CHR, BP, P-VALUE. Our script is prepared to read a file with no header, and no missing data. awk '{if (NR>1) print $1,$3,$9}' 3.1_adclean.cont.assoc.linear | grep -v NA > plot.adclean.cont.linear.txt # Open in R the script Rscript_qqMan.R and plot the results. You can check first what your working directory is by typing pwd. # Explore LD pattern by uploading the data of chromosome 19 in LDlink. Prepare a file containing CHR, BP, SNP, P-VALUE (with headers). awk '{if (NR==1 || $1==20) print $1,$3,$2,$9}' 3.1_adclean.cont.assoc.linear | grep -v NA > ld20.adclean.cont.linear.txt # To find the rsnumber of the SNP with the lowest p-value: sort -k4 -r ld20.adclean.cont.linear | head # 3.3. Run a linear regression for the continuous trait including only PC1 as covariate, hiding the results of the covariate, and using the --pheno option.