### EXERCISE 1. ASSOCIATION ANALYSIS FOR A BINARY TRAIT.

# Go to the case-control folder and check the files you have there. How many cases and controls do you have? Is there any missing data?

cd casecontrol
awk '{print $6}' adclean.cc.fam | sort | uniq -c

# Run an association analyses for the case-control variable AD. Check the log file and the output.

plink --bfile adclean.cc --assoc --out 1_adclean.cc





### EXERCISE 2. LOGISTIC REGRESSION (BINARY TRAIT).

# 2.1. Run a logisitic regression for the case-control variable AD including the principal components (to correct for ancestry) in file adpc.txt as covariates.

plink --bfile adclean.cc --logistic --covar adpc.txt --out 2.1_adclean.cc

# 2.2. Run a logisitic regression for the case-control variable AD including the principal components as covariates AND hiding the results of the covariates.



# 2.3. Plot the results from exercise 2.2. 

# Get columns that we need to plot Manhattan and QQ plots: CHR, BP, P-VALUE. Our script is prepared to read a file with  no header, and no missing data.

awk '{if (NR>1) print $1,$3,$9}' 2.2_adclean.cc.assoc.logistic | grep -v NA >  plot.adclean.cc.logistic.txt

# Open in R the script Rscript_qqMan.R and plot the results. You can check first what your working directory is by typing pwd.

# Explore LD pattern by uploading the data of chromosome 19 in LDlink. Prepare a file containing CHR, BP, SNP, P-VALUE (with headers).

awk '{if (NR==1 || $1==19) print $1,$3,$2,$9}'  2.2_adclean.cc.assoc.logistic | grep -v NA >  ld19.adclean.cc.logistic.txt

# To find the rsnumber of the SNP with the lowest p-value:

sort -k4 -r ld19.adclean.cc.logistic.txt | head





### EXERCISE 3. LINEAR REGRESSION (CONTINUOUS TRAIT)

# Go to the continuous folder and check the files you have there.

cd ../continuous/

# 3.1. Run a linear regression for the continuous trait  including the principal components as covariates, hiding the results of the covariates, and using the --pheno option.

plink --bfile adclean.cont --linear  hide-covar --pheno adclean.cont.txt --covar adpc.txt --out 3.1_adclean.cont


# 3.2. Plot the results from exercise 3.1. 

# Get columns that we need to plot Manhattan and QQ plots: CHR, BP, P-VALUE. Our script is prepared to read a file with no header, and no missing data.

awk '{if (NR>1) print $1,$3,$9}' 3.1_adclean.cont.assoc.linear | grep -v NA >  plot.adclean.cont.linear.txt

# Open in R the script Rscript_qqMan.R and plot the results. You can check first what your working directory is by typing pwd.

# Explore LD pattern by uploading the data of chromosome 19 in LDlink. Prepare a file containing CHR, BP, SNP, P-VALUE (with headers).

awk '{if (NR==1 || $1==20) print $1,$3,$2,$9}'  3.1_adclean.cont.assoc.linear | grep -v NA >  ld20.adclean.cont.linear.txt

# To find the rsnumber of the SNP with the lowest p-value:

sort -k4 -r ld20.adclean.cont.linear | head


# 3.3. Run a linear regression for the continuous trait  including only PC1 as covariate, hiding the results of the covariate, and using the --pheno option.