Answer sheet for the tutorial

Setting up environment


mkdir -p day7
cd day7
cp -r /faculty/sarah/2022/Day7/* .

PRS calculation using PRSice


# PRSice code - run this in the ssh terminal window
Rscript /usr/local/bin/PRSice.R --dir . --prsice /usr/local/bin/PRSice  --base weights.prs --target ozbmi  --thread 1  --snp SNP --chr CHR --bp BP --A1 A1 --A2 A2 --stat BETA --pvalue P  --beta --pheno ozht.pheno --score sum --all-score --bar-levels 0.00000005,0.000001,0.0001,0.01,1  --fastscore --quantile 5 --quant-ref 1 --binary-target F --no-regress

Open tables and merge

pheno <- read.table("ozht.pheno", header = T)
covar <- read.table("ozht.cov", header = T)
PRS <- read.table("PRSice.all_score", header = T)


head(PRS)

names(PRS) <- c("FID", "IID", "PRS1", "PRS2", "PRS3", "PRS4",
    "PRS5")
head(PRS)

pheno$ht <- pheno$ht * 100

# Best to use names instead of column number
temp = merge(pheno, covar[, 2:8], by = "IID")
longData = merge(temp, PRS[, 2:7], by = "IID")

# Run models
nullModel <- lm(ht ~ age + sex + PC1 + PC2 + PC3 + PC4, data = longData)
summary(nullModel)

## 
## Call:
## lm(formula = ht ~ age + sex + PC1 + PC2 + PC3 + PC4, data = longData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.7268  -6.0023  -0.2827   5.3050  30.4879 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 173.50935    6.65163  26.085  < 2e-16 ***
## age          -0.10116    0.01697  -5.961 2.99e-09 ***
## sex          -9.80000    0.43327 -22.619  < 2e-16 ***
## PC1         744.83641  373.94861   1.992   0.0465 *  
## PC2         240.08216  230.12319   1.043   0.2970    
## PC3         -97.14299  151.21233  -0.642   0.5207    
## PC4          54.75009  145.82996   0.375   0.7074    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.412 on 1887 degrees of freedom
##   (38 observations deleted due to missingness)
## Multiple R-squared:  0.2296, Adjusted R-squared:  0.2271 
## F-statistic: 93.71 on 6 and 1887 DF,  p-value: < 2.2e-16

# Then we are going to run a model that includes the
# covariates and the first PRS
thresh1 <- lm(ht ~ age + sex + PC1 + PC2 + PC3 + PC4 + PRS1,
    data = longData)
summary(thresh1)

## 
## Call:
## lm(formula = ht ~ age + sex + PC1 + PC2 + PC3 + PC4 + PRS1, data = longData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.6273  -5.9917  -0.2311   5.5705  29.9557 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 175.28775    6.60888  26.523  < 2e-16 ***
## age          -0.10084    0.01684  -5.988 2.54e-09 ***
## sex          -9.76070    0.43002 -22.698  < 2e-16 ***
## PC1         667.06438  371.36819   1.796   0.0726 .  
## PC2         266.81843  228.42069   1.168   0.2429    
## PC3         -96.05118  150.05960  -0.640   0.5222    
## PC4          35.77279  144.75946   0.247   0.8048    
## PRS1          3.11956    0.56855   5.487 4.64e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.348 on 1886 degrees of freedom
##   (38 observations deleted due to missingness)
## Multiple R-squared:  0.2417, Adjusted R-squared:  0.2389 
## F-statistic: 85.86 on 7 and 1886 DF,  p-value: < 2.2e-16

# We can compute the difference in R2 between the 2 models
# using the following code
summary(thresh1)$r.squared - summary(nullModel)$r.squared

## [1] 0.01210506

Format data - to one row per family (long -> wide)

zyg <- read.table("ozht.zyg", header = T)

# We will then merge on the zyg data using the individual
# ID 'IID' as the key.
longData1 = merge(longData, zyg[, 2:5], by = "IID")

# Take a look at the result to check that it worked
head(longData1)

# For openMx we need our data to be in a wide format with
# one line per family so we will need to reshape the data.
# In this code the variables listed in v.names are present
# for both twins The variable Twin specifies if the person
# is twin 1 or 2
wideData = reshape(data = longData1, direction = "wide", v.names = c("IID",
    "ht", "age", "sex", "PC1", "PC2", "PC3", "PC4", "PRS1", "PRS2",
    "PRS3", "PRS4", "PRS5"), idvar = "FID", timevar = "Twin",
    sep = "_")

# We will replace all missingness in the definition
# variables with 0 the people with missing definition
# variables don't have phenotypic data so this will not
# bais estimates
wideData[, c(6:12, 19:25)][is.na(wideData[, c(6:12, 19:25)])] <- 0

OpenMx model

library(OpenMx)
source("miFunctions5272022.R")
# Select Variables for Analysis
vars <- "ht"  # list of variables names
nv <- 1  # number of variables
ntv <- nv * 2  # number of total variables
depVar <- c("ht_1", "ht_2")  # dependent variables
indVar <- c("age_1", "sex_1", "PC1_1", "PC2_1", "PC3_1", "PC4_1",
    "PRS1_1", "age_2", "sex_2", "PC1_2", "PC2_2", "PC3_2", "PC4_2",
    "PRS1_2")  # independent variables or predictors
selVars <- c(depVar, indVar)

# Select Data for Analysis
mzData <- subset(wideData, MZDZ == 1, selVars)
dzData <- subset(wideData, MZDZ == 2, selVars)

# Set Starting Values
svMe <- 170  # start value for means
svVa <- 100  # start value for variance
lbVa <- 1e-05  # lower bound for variance
svBe <- 1e-04  # start value for means

# Create Data Objects for Multiple Groups
dataMZ <- mxData(observed = mzData, type = "raw")
dataDZ <- mxData(observed = dzData, type = "raw")


# Objects to hold definition variables for the regression
dPC1 <- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
    labels = c("data.PC1_1", "data.PC1_2"), name = "dPC1")
dPC2 <- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
    labels = c("data.PC2_1", "data.PC2_2"), name = "dPC2")
dPC3 <- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
    labels = c("data.PC3_1", "data.PC3_2"), name = "dPC3")
dPC4 <- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
    labels = c("data.PC4_1", "data.PC4_2"), name = "dPC4")
dsex <- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
    labels = c("data.sex_1", "data.sex_2"), name = "dsex")
dage <- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
    labels = c("data.age_1", "data.age_2"), name = "dage")
dPRS <- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
    labels = c("data.PRS1_1", "data.PRS1_2"), name = "dPRS")

# Objects to hold betas for the covariates
bPC1 <- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
    values = svBe, label = "b1", name = "bPC1")
bPC2 <- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
    values = svBe, label = "b2", name = "bPC2")
bPC3 <- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
    values = svBe, label = "b3", name = "bPC3")
bPC4 <- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
    values = svBe, label = "b4", name = "bPC4")
bsex <- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
    values = svBe, label = "b5", name = "bsex")
bage <- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
    values = svBe, label = "b6", name = "bage")
bPRS <- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
    values = svBe, label = "b7", name = "bPRS")

# Then we will build the model for the means that includes
# the regression tems (we are using Kronecker products here
# to mutliply the values of the independent variables by
# the betas)
meanG <- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
    values = svMe, labels = labVars("mean", vars), name = "meanG")
ExpMean <- mxAlgebra(expression = (meanG + bPC1 %x% dPC1 + bPC2 %x%
    dPC2 + bPC3 %x% dPC3 + bPC4 %x% dPC4 + bsex %x% dsex + bage %x%
    dage + bPRS %x% dPRS), name = "expMean")

# and do some house keeping (making a list of the objects
# so we can pull these into the model later)
pars <- c(bPC1, bPC2, bPC3, bPC4, bsex, bage, bPRS, meanG)
defs <- c(dPC1, dPC2, dPC3, dPC4, dsex, dage, dPRS)


# Create Algebra for expected Variance/Covariance Matrices
covMZ <- mxMatrix(type = "Symm", nrow = ntv, ncol = ntv, free = TRUE,
    values = c(0.1, 0.05, 0.1), lbound = valDiag(lbVa, ntv),
    labels = c("var", "cMZ21", "var"), name = "covMZ")
covDZ <- mxMatrix(type = "Symm", nrow = ntv, ncol = ntv, free = TRUE,
    values = c(0.1, 0.05, 0.1), lbound = valDiag(lbVa, ntv),
    labels = c("var", "cDZ21", "var"), name = "covDZ")

# Next we build the expection and model objects Create
# Expectation Objects for Multiple Groups
expMZ <- mxExpectationNormal(covariance = "covMZ", means = "expMean",
    dimnames = depVar)
expDZ <- mxExpectationNormal(covariance = "covDZ", means = "expMean",
    dimnames = depVar)
funML <- mxFitFunctionML()

# Create Model Objects for Multiple Groups
modelMZ <- mxModel(pars, defs, ExpMean, covMZ, dataMZ, expMZ,
    funML, name = "MZ")
modelDZ <- mxModel(pars, defs, ExpMean, covDZ, dataDZ, expDZ,
    funML, name = "DZ")
multi <- mxFitFunctionMultigroup(c("MZ", "DZ"))

# Build Saturated Model
model_wPRS <- mxModel("wPRS", pars, modelMZ, modelDZ, multi)

# Run model
fit_wPRS <- mxRun(model_wPRS)
(sum_wPRS <- summary(fit_wPRS))

## Summary of wPRS 
##  
## free parameters:
##      name   matrix  row  col     Estimate     Std.Error A lbound ubound
## 1      b1     bPC1    1    1 494.07780228 460.780896671                
## 2      b2     bPC2    1    1 376.45026775 275.668856990                
## 3      b3     bPC3    1    1 -52.34236364 170.976227797                
## 4      b4     bPC4    1    1  52.11647033 164.470117427                
## 5      b5     bsex    1    1  -8.91504139   0.557884837                
## 6      b6     bage    1    1  -0.11091751   0.021763799                
## 7      b7     bPRS    1    1   2.58520600   0.690176704                
## 8  meanht    meanG    1    1 173.54694174   8.320062467                
## 9     var MZ.covMZ ht_1 ht_1  74.34378646   2.844644713    1e-05       
## 10  cMZ21 MZ.covMZ ht_1 ht_2  69.45798062   2.857977923        0       
## 11  cDZ21 DZ.covDZ ht_1 ht_2  16.93968677   3.188544188        0       
## 
## Model Statistics: 
##                |  Parameters  |  Degrees of Freedom  |  Fit (-2lnL units)
##        Model:             11                   1883             12435.322
##    Saturated:             NA                     NA                    NA
## Independence:             NA                     NA                    NA
## Number of observations/statistics: 969/1894
## 
## Information Criteria: 
##       |  df Penalty  |  Parameters Penalty  |  Sample-Size Adjusted
## AIC:     8669.32237              12457.322                12457.598
## BIC:     -512.68389              12510.961                12476.025
## CFI: NA 
## TLI: 1   (also known as NNFI) 
## RMSEA:  0  [95% CI (NA, NA)]
## Prob(RMSEA <= 0.05): NA
## To get additional fit indices, see help(mxRefModels)
## timestamp: 2022-06-15 00:48:26 
## Wall clock time: 4.534725 secs 
## optimizer:  NPSOL 
## OpenMx version number: 2.19.5.1 
## Need help?  See help(mxSummary)

# And then run a submodel whic drops the PRS effect from
# the model Drop the PRS from the model (running the null
# model)
model_Null <- mxModel(fit_wPRS, name = "noPRS")
model_Null <- omxSetParameters(model_Null, labels = "b7", free = FALSE,
    values = 0)
fit_Null <- mxRun(model_Null)
(sum_Null <- summary(fit_Null))

## Summary of noPRS 
##  
## The final iterate satisfies the optimality conditions to the accuracy requested, but the sequence of iterates has not yet converged. Optimizer was terminated because no further improvement could be made in the merit function (Mx status GREEN). 
##  
## free parameters:
##      name   matrix  row  col    Estimate     Std.Error A lbound ubound
## 1      b1     bPC1    1    1 596.9028672 444.656189378                
## 2      b2     bPC2    1    1 341.5478088 279.361150612                
## 3      b3     bPC3    1    1 -60.4220028 141.517064862                
## 4      b4     bPC4    1    1  83.9433795 191.642823752                
## 5      b5     bsex    1    1  -8.9563185   0.561653963                
## 6      b6     bage    1    1  -0.1107354   0.021912748                
## 7  meanht    meanG    1    1 172.2274408   8.538805730                
## 8     var MZ.covMZ ht_1 ht_1  75.2064382   2.879449459    1e-05       
## 9   cMZ21 MZ.covMZ ht_1 ht_2  70.3274383   2.892338544        0       
## 10  cDZ21 DZ.covDZ ht_1 ht_2  17.6870557   3.240011731 !      0       
## 
## Model Statistics: 
##                |  Parameters  |  Degrees of Freedom  |  Fit (-2lnL units)
##        Model:             10                   1884             12449.195
##    Saturated:             NA                     NA                    NA
## Independence:             NA                     NA                    NA
## Number of observations/statistics: 969/1894
## 
## Information Criteria: 
##       |  df Penalty  |  Parameters Penalty  |  Sample-Size Adjusted
## AIC:      8681.1947              12469.195                12469.424
## BIC:      -505.6878              12517.957                12486.197
## CFI: NA 
## TLI: 1   (also known as NNFI) 
## RMSEA:  0  [95% CI (NA, NA)]
## Prob(RMSEA <= 0.05): NA
## To get additional fit indices, see help(mxRefModels)
## timestamp: 2022-06-15 00:48:29 
## Wall clock time: 2.568562 secs 
## optimizer:  NPSOL 
## OpenMx version number: 2.19.5.1 
## Need help?  See help(mxSummary)

# We can work out the significance of the PRS effect by
# comparing the fit of the models
mxCompare(fit_wPRS, fit_Null)

##   base comparison ep  minus2LL   df       AIC    diffLL diffdf             p
## 1 wPRS       <NA> 11 12435.322 1883 12457.322        NA     NA            NA
## 2 wPRS      noPRS 10 12449.195 1884 12469.195 13.872356      1 0.00019565525

# and calculate r2 as the difference in variance between
# the models gives variance explained by PRS, divided by
# total variance
(fit_Null$MZ$covMZ$values[1, 2] - fit_wPRS$MZ$covMZ$values[1,
    2])/fit_Null$MZ$covMZ$values[1, 2]

## [1] 0.012362994

Calculate GRM using GCTA


./gcta64 --bfile ozbmi --make-grm --out ozGRM

## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:48:29 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Options: 
##  
## --bfile ozbmi 
## --make-grm 
## --out ozGRM 
## 
## Note: GRM is computed using the SNPs on the autosomes.
## Reading PLINK FAM file from [ozbmi.fam]...
## 1956 individuals to be included from FAM file.
## 1956 individuals to be included. 651 males, 1305 females, 0 unknown.
## Reading PLINK BIM file from [ozbmi.bim]...
## 9758 SNPs to be included from BIM file(s).
## Computing the genetic relationship matrix (GRM) v2 ...
## Subset 1/1, no. subject 1-1956
##   1956 samples, 9758 markers, 1913946 GRM elements
## IDs for the GRM file have been saved in the file [ozGRM.grm.id]
## Computing GRM...
##   100% finished in 2.1 sec
## 9758 SNPs have been processed.
##   Used 9718 valid SNPs.
## The GRM computation is completed.
## Saving GRM...
## GRM has been saved in the file [ozGRM.grm.bin]
## Number of SNPs in each pair of individuals has been saved in the file [ozGRM.grm.N.bin]
## 
## Analysis finished at 00:48:31 CEST on Wed Jun 15 2022
## Overall computational time: 2.22 sec.

Open GRM and draw some plots

# Open GRM and visualise off-diagonal distribution
ReadGRMBin = function(prefix, AllN = F, size = 4) {
    sum_i = function(i) {
        return(sum(1:i))
    }
    BinFileName = paste(prefix, ".grm.bin", sep = "")
    NFileName = paste(prefix, ".grm.N.bin", sep = "")
    IDFileName = paste(prefix, ".grm.id", sep = "")
    id = read.table(IDFileName)
    n = dim(id)[1]
    BinFile = file(BinFileName, "rb")
    grm = readBin(BinFile, n = n * (n + 1)/2, what = numeric(0),
        size = size)
    NFile = file(NFileName, "rb")
    if (AllN == T) {
        N = readBin(NFile, n = n * (n + 1)/2, what = numeric(0),
            size = size)
    } else N = readBin(NFile, n = 1, what = numeric(0), size = size)
    i = sapply(1:n, sum_i)
    return(list(diag = grm[i], off = grm[-i], id = id, N = N))
}

GRM = ReadGRMBin(prefix = "ozGRM")

hist(GRM$diag, breaks = 100)  # Large values can indicate data problems or outstanding homozygosity rate

#
hist(GRM$off, breaks = 100)

# This plot isn't very informative as most relationship
# coefficents are close to 0. We can update our plot to
# only show relationship coefficents greater than 0.05
# using the following code

hist(GRM$off[which(GRM$off > 0.05)], breaks = 100)

# This plot clearly shows we have a lot of siblings and MZ
# twin pairs. If we zoom in even further we might spot
# those cousins

hist(GRM$off[which(GRM$off > 0.05)], breaks = 100, xlim = c(0.05,
    0.3))

Write files for GCTA analysis

prs = read.table("PRSice.all_score", header = T)
covar <- read.table("ozht.cov", header = T)
pheno <- read.table("ozht.pheno", header = T)

# Then rescaling and renaming
pheno$ht <- pheno$ht * 100
names(prs) <- c("FID", "IID", "PRS1", "PRS2", "PRS3", "PRS4",
    "PRS5")

# Then merging the data into 1 dataframe
all = merge(covar, prs, by = c("IID", "FID"))
all = merge(all, pheno, by = c("IID", "FID"))

# Then we'll write out the covariate files for the
# continuous covariates that we want to include in the GCTA
# model It's important that the PRS is in the last column
# of this file
write.table(all[, c("FID", "IID", "age", "PC1", "PC2", "PC3",
    "PC4", "PRS1")], "ozht.prs1.qcovar", quote = F, row.names = F)
write.table(all[, c("FID", "IID", "age", "PC1", "PC2", "PC3",
    "PC4", "PRS2")], "ozht.prs2.qcovar", quote = F, row.names = F)
write.table(all[, c("FID", "IID", "age", "PC1", "PC2", "PC3",
    "PC4", "PRS3")], "ozht.prs3.qcovar", quote = F, row.names = F)
write.table(all[, c("FID", "IID", "age", "PC1", "PC2", "PC3",
    "PC4", "PRS4")], "ozht.prs4.qcovar", quote = F, row.names = F)
write.table(all[, c("FID", "IID", "age", "PC1", "PC2", "PC3",
    "PC4", "PRS5")], "ozht.prs5.qcovar", quote = F, row.names = F)

# Then we'll write out the covariate files for the binary
# covariates that we want to include in the GCTA model
write.table(all[, c("FID", "IID", "sex")], "ozht.covar", quote = F,
    row.names = F)

# Then we'll write out the phenotype files for the GCTA
# model
write.table(all[, c("FID", "IID", "ht")], "ozht.phen", quote = F,
    row.names = F)

Run GCTA with fixed and random effect (AE model)


./gcta64 --pheno ozht.phen --qcovar ozht.prs1.qcovar --covar ozht.covar --grm ozGRM --out ozhtPRS1 --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs2.qcovar --covar ozht.covar --grm ozGRM --out ozhtPRS2 --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs3.qcovar --covar ozht.covar --grm ozGRM --out ozhtPRS3 --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs4.qcovar --covar ozht.covar --grm ozGRM --out ozhtPRS4 --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs5.qcovar --covar ozht.covar --grm ozGRM --out ozhtPRS5 --reml --reml-est-fix
 
#GCTA will make a .log file and an .hsq file for each model. 
#The beta and SE of the PRS effect can be found in the line labeled X_7 in the log files.
#We can pull these out of the log files and save them to a file with the following code
 
grep X_7 *.log > gcta_results.txt

## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:48:33 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs1.qcovar
## --covar ozht.covar
## --grm ozGRM
## --out ozhtPRS1
## --reml
## --reml-est-fix
## 
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
## 
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs1.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs1.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
## 
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
## 
## Performing  REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 2 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 43.9528 34.2896
## logL: -4810.94
## Running AI-REML algorithm ...
## Iter.    logL    V(G)    V(e)    
## 1    -4751.85    59.84011    17.40093    
## 2    -4647.03    70.13324    10.91462    
## 3    -4590.77    76.39449    8.49173 
## 4    -4567.32    80.61353    7.36394 
## 5    -4557.13    83.63826    6.74011 
## 6    -4552.33    85.84919    6.35988 
## 7    -4549.96    87.47110    6.11437 
## 8    -4548.77    88.65891    5.94998 
## 9    -4548.16    89.52614    5.83726 
## 10   -4547.85    91.52381    5.58866 
## 11   -4547.51    91.78332    5.56727 
## 12   -4547.51    91.80849    5.56429 
## 13   -4547.51    91.81150    5.56392 
## Log-likelihood ratio converged.
## 
## Calculating the logLikelihood for the reduced model ...
## (variance component 1 is dropped from the model)
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 69.78139
## logL: -4980.18681
## Running AI-REML algorithm ...
## Iter.    logL    V(e)    
## 1    -4948.05    69.75216    
## 2    -4948.05    69.73219    
## 3    -4948.05    69.68899    
## 4    -4948.05    69.68902    
## 5    -4948.05    69.68902    
## Log-likelihood ratio converged.
## 
## Summary result of REML analysis:
## Source   Variance    SE
## V(G) 91.811500   3.895050
## V(e) 5.563916    0.357498
## Vp   97.375416   3.852776
## V(G)/Vp  0.942861    0.004449
## 
## Sampling variance/covariance of the estimates of variance components:
## 1.517142e+01 -2.276706e-01   
## -2.276706e-01    1.278046e-01    
## Estimatesof fixed effects:
## 
## Source   Estimate    SE
## mean 169.095957  8.764314
## X_2  -0.113977   0.022456
## X_3  -17.551326  484.849288
## X_4  410.861460  292.698935
## X_5  51.660560   189.243241
## X_6  -41.123137  179.306181
## X_7  3.182480    0.808135
## X_8  -9.535405   0.566582
## 
## Summary result of REML analysis has been saved in the file [ozhtPRS1.hsq].
## 
## Analysis finished at 00:48:44 CEST on Wed Jun 15 2022
## Overall computational time: 11.16 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:48:44 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs2.qcovar
## --covar ozht.covar
## --grm ozGRM
## --out ozhtPRS2
## --reml
## --reml-est-fix
## 
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
## 
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs2.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs2.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
## 
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
## 
## Performing  REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 2 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values:  43.828 34.2361
## logL: -4807.36
## Running AI-REML algorithm ...
## Iter.    logL    V(G)    V(e)    
## 1    -4747.62    59.53572    17.48921    
## 2    -4644.39    69.76967    10.98529    
## 3    -4588.49    76.01234    8.53453 
## 4    -4564.96    80.21423    7.39286 
## 5    -4554.71    83.22495    6.76154 
## 6    -4549.88    85.42585    6.37683 
## 7    -4547.49    87.04118    6.12843 
## 8    -4546.29    88.22501    5.96209 
## 9    -4545.67    89.09008    5.84799 
## 10   -4545.36    91.08456    5.59628 
## 11   -4545.02    91.34835    5.57435 
## 12   -4545.01    91.37438    5.57124 
## 13   -4545.01    91.37754    5.57085 
## Log-likelihood ratio converged.
## 
## Calculating the logLikelihood for the reduced model ...
## (variance component 1 is dropped from the model)
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 69.17535
## logL: -4974.01940
## Running AI-REML algorithm ...
## Iter.    logL    V(e)    
## 1    -4939.88    69.14531    
## 2    -4939.88    69.12478    
## 3    -4939.88    69.08038    
## 4    -4939.88    69.08041    
## 5    -4939.88    69.08041    
## Log-likelihood ratio converged.
## 
## Summary result of REML analysis:
## Source   Variance    SE
## V(G) 91.377539   3.879798
## V(e) 5.570853    0.358140
## Vp   96.948392   3.837078
## V(G)/Vp  0.942538    0.004478
## 
## Sampling variance/covariance of the estimates of variance components:
## 1.505283e+01 -2.289649e-01   
## -2.289649e-01    1.282644e-01    
## Estimatesof fixed effects:
## 
## Source   Estimate    SE
## mean 168.417530  8.736724
## X_2  -0.112660   0.022406
## X_3  15.076693   483.651444
## X_4  406.821756  292.025107
## X_5  54.259660   188.838053
## X_6  -65.133325  178.945021
## X_7  3.286590    0.720470
## X_8  -9.516487   0.565417
## 
## Summary result of REML analysis has been saved in the file [ozhtPRS2.hsq].
## 
## Analysis finished at 00:48:55 CEST on Wed Jun 15 2022
## Overall computational time: 11.14 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:48:55 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs3.qcovar
## --covar ozht.covar
## --grm ozGRM
## --out ozhtPRS3
## --reml
## --reml-est-fix
## 
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
## 
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs3.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs3.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
## 
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
## 
## Performing  REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 2 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 43.7761 34.2641
## logL: -4806.95
## Running AI-REML algorithm ...
## Iter.    logL    V(G)    V(e)    
## 1    -4747.50    59.39239    17.64269    
## 2    -4646.02    69.67637    11.07406    
## 3    -4590.07    75.97320    8.58117 
## 4    -4566.24    80.20843    7.42150 
## 5    -4555.84    83.24244    6.78120 
## 6    -4550.93    85.46101    6.39135 
## 7    -4548.50    87.09011    6.13976 
## 8    -4547.28    88.28472    5.97131 
## 9    -4546.65    89.15818    5.85577 
## 10   -4546.33    91.17320    5.60084 
## 11   -4545.98    91.44246    5.57847 
## 12   -4545.97    91.46926    5.57527 
## 13   -4545.97    91.47254    5.57487 
## Log-likelihood ratio converged.
## 
## Calculating the logLikelihood for the reduced model ...
## (variance component 1 is dropped from the model)
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 68.88252
## logL: -4971.07558
## Running AI-REML algorithm ...
## Iter.    logL    V(e)    
## 1    -4935.95    68.85209    
## 2    -4935.95    68.83129    
## 3    -4935.94    68.78631    
## 4    -4935.94    68.78634    
## 5    -4935.94    68.78634    
## Log-likelihood ratio converged.
## 
## Summary result of REML analysis:
## Source   Variance    SE
## V(G) 91.472544   3.884525
## V(e) 5.574867    0.358533
## Vp   97.047411   3.841601
## V(G)/Vp  0.942555    0.004479
## 
## Sampling variance/covariance of the estimates of variance components:
## 1.508953e+01 -2.300927e-01   
## -2.300927e-01    1.285462e-01    
## Estimatesof fixed effects:
## 
## Source   Estimate    SE
## mean 168.639132  8.743245
## X_2  -0.112417   0.022418
## X_3  -16.914727  484.031778
## X_4  417.248000  292.245842
## X_5  47.967643   188.946734
## X_6  -54.884823  179.008572
## X_7  2.898535    0.664494
## X_8  -9.535677   0.565604
## 
## Summary result of REML analysis has been saved in the file [ozhtPRS3.hsq].
## 
## Analysis finished at 00:49:06 CEST on Wed Jun 15 2022
## Overall computational time: 10.93 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:49:06 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs4.qcovar
## --covar ozht.covar
## --grm ozGRM
## --out ozhtPRS4
## --reml
## --reml-est-fix
## 
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
## 
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs4.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs4.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
## 
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
## 
## Performing  REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 2 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 43.7703 34.2567
## logL: -4806.73
## Running AI-REML algorithm ...
## Iter.    logL    V(G)    V(e)    
## 1    -4747.20    59.39007    17.62675    
## 2    -4645.59    69.66570    11.06386    
## 3    -4589.65    75.95222    8.57561 
## 4    -4565.86    80.17987    7.41804 
## 5    -4555.48    83.20819    6.77879 
## 6    -4550.58    85.42239    6.38956 
## 7    -4548.16    87.04814    6.13836 
## 8    -4546.94    88.24021    5.97017 
## 9    -4546.31    89.11175    5.85481 
## 10   -4545.99    91.12224    5.60027 
## 11   -4545.65    91.39066    5.57795 
## 12   -4545.64    91.41736    5.57477 
## 13   -4545.64    91.42062    5.57436 
## Log-likelihood ratio converged.
## 
## Calculating the logLikelihood for the reduced model ...
## (variance component 1 is dropped from the model)
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 68.84960
## logL: -4970.79105
## Running AI-REML algorithm ...
## Iter.    logL    V(e)    
## 1    -4935.55    68.81912    
## 2    -4935.55    68.79829    
## 3    -4935.55    68.75324    
## 4    -4935.55    68.75327    
## 5    -4935.55    68.75327    
## Log-likelihood ratio converged.
## 
## Summary result of REML analysis:
## Source   Variance    SE
## V(G) 91.420622   3.882391
## V(e) 5.574364    0.358485
## Vp   96.994986   3.839481
## V(G)/Vp  0.942529    0.004480
## 
## Sampling variance/covariance of the estimates of variance components:
## 1.507296e+01 -2.299303e-01   
## -2.299303e-01    1.285118e-01    
## Estimatesof fixed effects:
## 
## Source   Estimate    SE
## mean 168.212392  8.737739
## X_2  -0.112576   0.022412
## X_3  2.572887    483.807226
## X_4  412.614296  292.132395
## X_5  48.578056   188.894702
## X_6  -57.183480  178.965315
## X_7  2.801184    0.629326
## X_8  -9.528049   0.565490
## 
## Summary result of REML analysis has been saved in the file [ozhtPRS4.hsq].
## 
## Analysis finished at 00:49:17 CEST on Wed Jun 15 2022
## Overall computational time: 10.97 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:49:17 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs5.qcovar
## --covar ozht.covar
## --grm ozGRM
## --out ozhtPRS5
## --reml
## --reml-est-fix
## 
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
## 
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs5.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs5.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
## 
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
## 
## Performing  REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 2 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 43.7455 34.2568
## logL: -4806.24
## Running AI-REML algorithm ...
## Iter.    logL    V(G)    V(e)    
## 1    -4746.69    59.33900    17.65774    
## 2    -4645.51    69.62249    11.07936    
## 3    -4589.55    75.91415    8.58302 
## 4    -4565.71    80.14304    7.42229 
## 5    -4555.30    83.17158    6.78155 
## 6    -4550.39    85.38582    6.39149 
## 7    -4547.97    87.01165    6.13978 
## 8    -4546.74    88.20385    5.97127 
## 9    -4546.12    89.07556    5.85569 
## 10   -4545.79    91.08660    5.60067 
## 11   -4545.45    91.35558    5.57830 
## 12   -4545.44    91.38236    5.57510 
## 13   -4545.44    91.38565    5.57470 
## Log-likelihood ratio converged.
## 
## Calculating the logLikelihood for the reduced model ...
## (variance component 1 is dropped from the model)
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 68.71671
## logL: -4969.43466
## Running AI-REML algorithm ...
## Iter.    logL    V(e)    
## 1    -4933.73    68.68605    
## 2    -4933.73    68.66510    
## 3    -4933.73    68.61979    
## 4    -4933.73    68.61982    
## 5    -4933.73    68.61982    
## Log-likelihood ratio converged.
## 
## Summary result of REML analysis:
## Source   Variance    SE
## V(G) 91.385646   3.881152
## V(e) 5.574698    0.358524
## Vp   96.960344   3.838207
## V(G)/Vp  0.942505    0.004483
## 
## Sampling variance/covariance of the estimates of variance components:
## 1.506334e+01 -2.300251e-01   
## -2.300251e-01    1.285397e-01    
## Estimatesof fixed effects:
## 
## Source   Estimate    SE
## mean 168.446708  8.737619
## X_2  -0.113225   0.022407
## X_3  -9.622066   483.773546
## X_4  412.972238  292.081569
## X_5  48.675267   188.861598
## X_6  -57.801962  178.935760
## X_7  2.763790    0.613928
## X_8  -9.544225   0.565300
## 
## Summary result of REML analysis has been saved in the file [ozhtPRS5.hsq].
## 
## Analysis finished at 00:49:28 CEST on Wed Jun 15 2022
## Overall computational time: 10.91 sec.

Format GCTA outputs predictivness

# We can read in the gcta results
res <- as.data.frame(read.table("gcta_results.txt"))
names(res) <- c("Model", "beta", "se")

# To calculate R2 we scale beta into a marginal correlation
# and then square it We are calculating this and adding it
# to the res object
res$r2 <- rbind((res[1, 2]/sd(all$ht, na.rm = T) * sd(all$PRS1,
    na.rm = T))^2, (res[2, 2]/sd(all$ht, na.rm = T) * sd(all$PRS2,
    na.rm = T))^2, (res[3, 2]/sd(all$ht, na.rm = T) * sd(all$PRS3,
    na.rm = T))^2, (res[4, 2]/sd(all$ht, na.rm = T) * sd(all$PRS4,
    na.rm = T))^2, (res[5, 2]/sd(all$ht, na.rm = T) * sd(all$PRS5,
    na.rm = T))^2)


# To test the significance of the betas we can use t-tests
# We are calculating this and adding it to the res object
res$tStat <- res$beta/res$se  # Follows a student distribution
res$pval <- (1 - pt(q = res$tStat, df = 1887, lower.tail = T)) *
    2

# Now we can look at the results
res

Make barplots to summarise associations

r2 <- data.frame(name = c("5e-08", "1e-06", "0.0001", "0.01",
    "1"), R2 = c(1.236, 1.913, 2.218, 2.34, 2.561), P = c(0.000195,
    5.04e-06, 6.91e-07, 2.67e-07, 6.12e-08))
# Increase bottom margin
par(mar = c(4, 4, 4, 4))
# Barplot jpeg('height.R2.jpg', width = 800, height = 800,
# quality=75, bg = 'white', type = 'cairo')
my_bar <- barplot(r2$R2, border = F, names.arg = r2$name, las = 2,
    col = c("skyblue1", "skyblue2", "skyblue3", "skyblue4"),
    ylim = c(0, 3), main = "Predicting height from height PRS",
    ylab = "% variance explained", xlab = "PRS Thresholds")
# Add P values to the bars
text(my_bar, r2$R2 + 0.04, paste("P=", r2$P, sep = ""), cex = 1)

# dev.off()

# Next we'll plot height by quintile of PRS (often these
# plots are made with deciles but the sample is small so
# we're going to use quintiles here)
library(Hmisc)
library(gplots)
# the following command divides the data into quintiles.
longData$percentiles <- factor(cut2(longData$PRS5, g = 5, levels.mean = TRUE))

# Plot the mean of height by precentile with 95% CIs
# jpeg('Quintile.jpg', width = 800, height = 800,
# quality=75, bg = 'white', type = 'cairo')
plotmeans(ht ~ percentiles, data = longData, xlab = "Quintile of PRS",
    ylab = "Height in Cm", legends = c("1", "2", "3", "4", "5"))

# dev.off()

Optional analysis - adapt code to fit ACE model in GCTA

Make matrix of variance covariance for the shared environment

# We can make the CRM by modyfing the GRM
hist(GRM$off[which(GRM$off > 0.2)], breaks = 100)  # Remember that the sample contains MZ and DZ twins (or siblings)

# Format GRM into a NxN matrix (simpler to write later)
asNxNGRM <- function(BRMbin) {
    mat <- matrix(0, nrow = length(BRMbin$diag), ncol = length(BRMbin$diag))
    mat[upper.tri(mat)] <- BRMbin$off
    mat <- mat + t(mat)
    diag(mat) <- BRMbin$diag
    colnames(mat) <- BRMbin$id[, 2]
    rownames(mat) <- BRMbin$id[, 2]
    return(mat)
}

# convert GRM into NxN matrix
GRMmat = asNxNGRM(GRM)

# We can build the CRM as a modified GRM
CRM = GRMmat
CRM[which(CRM > 0.2, arr.ind = T)] = 1  # we assume that individuals with GRM>0.2 have grown up together -> C=1
CRM[which(CRM < 0.2, arr.ind = T)] = 0  # other less related individuals have no shared environment -> C=0

# Check how many 1 and 0 we have
table(CRM[upper.tri(CRM)])  # 978 families

## 
##       0       1 
## 1911012     978

table(diag(CRM))  # check diagonal

## 
##    1 
## 1956

# We get the info about family ID and IID - needed to
# create the .grm.id file
fam = GRM$id
colnames(fam) = c("fam", "id")

# We get this package which allows writing a GRM
# install.packages('genio')
library(genio)  # needed to write_grm

genio::write_grm(kinship = CRM, name = "ozCRM", fam = fam)

Fit several random effects in GCTA


# We create this text file, which stores the name of the different GRM/CRM matrices so GCTA knows which to open
echo "ozGRM" > GRM-CRM.txt
echo "ozCRM" >> GRM-CRM.txt
head GRM-CRM.txt

./gcta64 --pheno ozht.phen --qcovar ozht.prs1.qcovar --covar ozht.covar --mgrm GRM-CRM.txt --out ozhtPRS1.ACE --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs2.qcovar --covar ozht.covar --mgrm GRM-CRM.txt --out ozhtPRS2.ACE --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs3.qcovar --covar ozht.covar --mgrm GRM-CRM.txt --out ozhtPRS3.ACE --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs4.qcovar --covar ozht.covar --mgrm GRM-CRM.txt --out ozhtPRS4.ACE --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs5.qcovar --covar ozht.covar --mgrm GRM-CRM.txt --out ozhtPRS5.ACE --reml --reml-est-fix

grep X_7 *ACE.log > gcta_results_ACE.txt

## ozGRM
## ozCRM
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:49:34 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs1.qcovar
## --covar ozht.covar
## --mgrm GRM-CRM.txt
## --out ozhtPRS1.ACE
## --reml
## --reml-est-fix
## 
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
## 
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs1.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs1.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
## 
## There are 2 GRM file names specified in the file [GRM-CRM.txt].
## Reading the GRM from the 1th file ...
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading the GRM from the 2th file ...
## Reading IDs of the GRM from [ozCRM.grm.id].
## 1956 IDs are read from [ozCRM.grm.id].
## Reading the GRM from [ozCRM.grm.bin].
## GRM for 1956 individuals are included from [ozCRM.grm.bin].
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
## 
## Performing  REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 3 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 29.9405 29.1033  24.645
## logL: -4736.31
## Running AI-REML algorithm ...
## Iter.    logL    V(G1)   V(G2)   V(e)    
## 1    -4700.32    50.36555    18.19224    13.55946    
## 2    -4611.85    61.35311    13.83297    9.51921 
## 3    -4573.17    67.87119    11.53647    7.88367 
## 4    -4557.75    72.23652    10.14153    7.04864 
## 5    -4550.93    75.29060    9.25897 6.56151 
## 6    -4547.70    77.45579    8.69594 6.25505 
## 7    -4546.10    78.99608    8.33749 6.05316 
## 8    -4545.30    80.09254    8.11040 5.91617 
## 9    -4544.89    82.56309    7.65749 5.61614 
## 10   -4544.46    82.77717    7.74327 5.59209 
## 11   -4544.46    82.82704    7.72411 5.58854 
## 12   -4544.46    82.82629    7.72855 5.58805 
## Log-likelihood ratio converged.
## 
## Summary result of REML analysis:
## Source   Variance    SE
## V(G1)    82.826286   5.203826
## V(G2)    7.728551    3.584645
## V(e) 5.588054    0.359747
## Vp   96.142891   3.815144
## V(G1)/Vp 0.861492    0.037840
## V(G2)/Vp 0.080386    0.037338
## 
## Sum of V(G)/Vp   0.941878    0.004543
## 
## Sampling variance/covariance of the estimates of variance components:
## 2.707981e+01 -1.251748e+01   -2.574890e-01   
## -1.251748e+01    1.284968e+01    2.317447e-02    
## -2.574890e-01    2.317447e-02    1.294179e-01    
## Estimatesof fixed effects:
## 
## Source   Estimate    SE
## mean 169.255966  8.892221
## X_2  -0.112214   0.023057
## X_3  -49.312966  489.526725
## X_4  414.675175  296.962542
## X_5  39.556627   191.180832
## X_6  -31.085455  181.347441
## X_7  3.071223    0.804539
## X_8  -9.529359   0.582336
## 
## Summary result of REML analysis has been saved in the file [ozhtPRS1.ACE.hsq].
## 
## Analysis finished at 00:49:43 CEST on Wed Jun 15 2022
## Overall computational time: 9.18 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:49:43 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs2.qcovar
## --covar ozht.covar
## --mgrm GRM-CRM.txt
## --out ozhtPRS2.ACE
## --reml
## --reml-est-fix
## 
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
## 
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs2.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs2.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
## 
## There are 2 GRM file names specified in the file [GRM-CRM.txt].
## Reading the GRM from the 1th file ...
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading the GRM from the 2th file ...
## Reading IDs of the GRM from [ozCRM.grm.id].
## 1956 IDs are read from [ozCRM.grm.id].
## Reading the GRM from [ozCRM.grm.bin].
## GRM for 1956 individuals are included from [ozCRM.grm.bin].
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
## 
## Performing  REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 3 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 29.8995 29.0562 24.6377
## logL: -4733.47
## Running AI-REML algorithm ...
## Iter.    logL    V(G1)   V(G2)   V(e)    
## 1    -4697.25    50.24276    18.04696    13.61573    
## 2    -4609.52    61.21929    13.64557    9.56059 
## 3    -4570.92    67.72897    11.33108    7.91190 
## 4    -4555.47    72.08346    9.92817 7.06959 
## 5    -4548.62    75.12825    9.04166 6.57814 
## 6    -4545.34    77.28681    8.47639 6.26889 
## 7    -4543.77    78.82292    8.11646 6.06511 
## 8    -4542.96    79.91706    7.88831 5.92680 
## 9    -4542.55    82.38431    7.43282 5.62374 
## 10   -4542.11    82.60353    7.51772 5.59911 
## 11   -4542.11    82.65447    7.49848 5.59543 
## 12   -4542.11    82.65387    7.50293 5.59492 
## 13   -4542.11    82.65550    7.50186 5.59486 
## Log-likelihood ratio converged.
## 
## Summary result of REML analysis:
## Source   Variance    SE
## V(G1)    82.655495   5.190102
## V(G2)    7.501856    3.565254
## V(e) 5.594857    0.360340
## Vp   95.752208   3.801264
## V(G1)/Vp 0.863223    0.037804
## V(G2)/Vp 0.078347    0.037294
## 
## Sum of V(G)/Vp   0.941569    0.004571
## 
## Sampling variance/covariance of the estimates of variance components:
## 2.693716e+01 -1.242863e+01   -2.588074e-01   
## -1.242863e+01    1.271104e+01    2.321948e-02    
## -2.588074e-01    2.321948e-02    1.298450e-01    
## Estimatesof fixed effects:
## 
## Source   Estimate    SE
## mean 168.624485  8.862613
## X_2  -0.110984   0.022994
## X_3  -17.724628  488.242119
## X_4  409.842478  296.195656
## X_5  43.235878   190.742260
## X_6  -55.042363  180.962099
## X_7  3.174626    0.717059
## X_8  -9.508999   0.580801
## 
## Summary result of REML analysis has been saved in the file [ozhtPRS2.ACE.hsq].
## 
## Analysis finished at 00:49:53 CEST on Wed Jun 15 2022
## Overall computational time: 10.07 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:49:53 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs3.qcovar
## --covar ozht.covar
## --mgrm GRM-CRM.txt
## --out ozhtPRS3.ACE
## --reml
## --reml-est-fix
## 
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
## 
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs3.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs3.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
## 
## There are 2 GRM file names specified in the file [GRM-CRM.txt].
## Reading the GRM from the 1th file ...
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading the GRM from the 2th file ...
## Reading IDs of the GRM from [ozCRM.grm.id].
## 1956 IDs are read from [ozCRM.grm.id].
## Reading the GRM from [ozCRM.grm.bin].
## GRM for 1956 individuals are included from [ozCRM.grm.bin].
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
## 
## Performing  REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 3 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 29.8736 29.0475 24.6513
## logL: -4732.9
## Running AI-REML algorithm ...
## Iter.    logL    V(G1)   V(G2)   V(e)    
## 1    -4696.82    50.02688    18.17504    13.71829    
## 2    -4610.66    61.02046    13.79117    9.62093 
## 3    -4572.02    67.54188    11.49160    7.94854 
## 4    -4556.43    71.90039    10.10100    7.09463 
## 5    -4549.51    74.94802    9.22342 6.59669 
## 6    -4546.23    77.10985    8.66428 6.28347 
## 7    -4544.60    78.64964    8.30847 6.07709 
## 8    -4543.78    79.74753    8.08303 5.93699 
## 9    -4543.36    82.22597    7.63312 5.62996 
## 10   -4542.92    82.45294    7.71726 5.60473 
## 11   -4542.91    82.50486    7.69811 5.60091 
## 12   -4542.91    82.50445    7.70257 5.60038 
## 13   -4542.91    82.50611    7.70149 5.60032 
## Log-likelihood ratio converged.
## 
## Summary result of REML analysis:
## Source   Variance    SE
## V(G1)    82.506111   5.184994
## V(G2)    7.701490    3.567178
## V(e) 5.600317    0.360871
## Vp   95.807918   3.804076
## V(G1)/Vp 0.861162    0.037797
## V(G2)/Vp 0.080385    0.037287
## 
## Sum of V(G)/Vp   0.941546    0.004575
## 
## Sampling variance/covariance of the estimates of variance components:
## 2.688416e+01 -1.239702e+01   -2.608549e-01   
## -1.239702e+01    1.272476e+01    2.379687e-02    
## -2.608549e-01    2.379687e-02    1.302279e-01    
## Estimatesof fixed effects:
## 
## Source   Estimate    SE
## mean 168.920867  8.871820
## X_2  -0.110746   0.023018
## X_3  -52.818537  488.707817
## X_4  419.092591  296.477111
## X_5  36.334022   190.873399
## X_6  -44.340405  181.048200
## X_7  2.813803    0.661036
## X_8  -9.527184   0.581329
## 
## Summary result of REML analysis has been saved in the file [ozhtPRS3.ACE.hsq].
## 
## Analysis finished at 00:50:03 CEST on Wed Jun 15 2022
## Overall computational time: 9.54 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:50:03 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs4.qcovar
## --covar ozht.covar
## --mgrm GRM-CRM.txt
## --out ozhtPRS4.ACE
## --reml
## --reml-est-fix
## 
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
## 
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs4.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs4.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
## 
## There are 2 GRM file names specified in the file [GRM-CRM.txt].
## Reading the GRM from the 1th file ...
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading the GRM from the 2th file ...
## Reading IDs of the GRM from [ozCRM.grm.id].
## 1956 IDs are read from [ozCRM.grm.id].
## Reading the GRM from [ozCRM.grm.bin].
## GRM for 1956 individuals are included from [ozCRM.grm.bin].
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
## 
## Performing  REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 3 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 29.8714 29.0453 24.6475
## logL: -4732.7
## Running AI-REML algorithm ...
## Iter.    logL    V(G1)   V(G2)   V(e)    
## 1    -4696.57    50.05982    18.14688    13.69875    
## 2    -4610.19    61.03608    13.76834    9.61007 
## 3    -4571.60    67.54205    11.47352    7.94229 
## 4    -4556.05    71.88905    10.08655    7.09055 
## 5    -4549.15    74.92784    9.21178 6.59380 
## 6    -4545.87    77.08286    8.65476 6.28129 
## 7    -4544.25    78.61747    8.30052 6.07537 
## 8    -4543.44    79.71148    8.07621 5.93558 
## 9    -4543.02    82.18079    7.62885 5.62923 
## 10   -4542.58    82.40633    7.71308 5.60410 
## 11   -4542.57    82.45812    7.69390 5.60030 
## 12   -4542.57    82.45766    7.69839 5.59977 
## 13   -4542.57    82.45932    7.69729 5.59971 
## Log-likelihood ratio converged.
## 
## Summary result of REML analysis:
## Source   Variance    SE
## V(G1)    82.459325   5.181032
## V(G2)    7.697294    3.563947
## V(e) 5.599706    0.360812
## Vp   95.756325   3.801953
## V(G1)/Vp 0.861137    0.037783
## V(G2)/Vp 0.080384    0.037273
## 
## Sum of V(G)/Vp   0.941521    0.004577
## 
## Sampling variance/covariance of the estimates of variance components:
## 2.684309e+01 -1.237322e+01   -2.605583e-01   
## -1.237322e+01    1.270172e+01    2.370034e-02    
## -2.605583e-01    2.370034e-02    1.301853e-01    
## Estimatesof fixed effects:
## 
## Source   Estimate    SE
## mean 168.476664  8.865994
## X_2  -0.110867   0.023012
## X_3  -35.580668  488.477468
## X_4  415.705484  296.370772
## X_5  36.669484   190.821852
## X_6  -47.537680  181.007689
## X_7  2.723517    0.626279
## X_8  -9.520307   0.581206
## 
## Summary result of REML analysis has been saved in the file [ozhtPRS4.ACE.hsq].
## 
## Analysis finished at 00:50:12 CEST on Wed Jun 15 2022
## Overall computational time: 8.75 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:50:12 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
## 
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs5.qcovar
## --covar ozht.covar
## --mgrm GRM-CRM.txt
## --out ozhtPRS5.ACE
## --reml
## --reml-est-fix
## 
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
## 
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs5.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs5.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
## 
## There are 2 GRM file names specified in the file [GRM-CRM.txt].
## Reading the GRM from the 1th file ...
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading the GRM from the 2th file ...
## Reading IDs of the GRM from [ozCRM.grm.id].
## 1956 IDs are read from [ozCRM.grm.id].
## Reading the GRM from [ozCRM.grm.bin].
## GRM for 1956 individuals are included from [ozCRM.grm.bin].
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
## 
## Performing  REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 3 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 29.8578 29.0401 24.6464
## logL: -4732.1
## Running AI-REML algorithm ...
## Iter.    logL    V(G1)   V(G2)   V(e)    
## 1    -4695.94    49.99573    18.17732    13.71870    
## 2    -4609.97    60.96663    13.80366    9.62128 
## 3    -4571.39    67.46612    11.51264    7.94896 
## 4    -4555.83    71.80684    10.12860    7.09504 
## 5    -4548.92    74.84089    9.25582 6.59709 
## 6    -4545.63    76.99269    8.70006 6.28385 
## 7    -4544.02    78.52530    8.34655 6.07745 
## 8    -4543.20    79.61813    8.12264 5.93734 
## 9    -4542.78    82.08548    7.67591 5.63026 
## 10   -4542.34    82.31263    7.75955 5.60502 
## 11   -4542.33    82.36451    7.74045 5.60120 
## 12   -4542.33    82.36411    7.74491 5.60067 
## 13   -4542.33    82.36578    7.74383 5.60060 
## Log-likelihood ratio converged.
## 
## Summary result of REML analysis:
## Source   Variance    SE
## V(G1)    82.365776   5.177148
## V(G2)    7.743826    3.562831
## V(e) 5.600599    0.360904
## Vp   95.710201   3.800263
## V(G1)/Vp 0.860575    0.037789
## V(G2)/Vp 0.080909    0.037278
## 
## Sum of V(G)/Vp   0.941484    0.004581
## 
## Sampling variance/covariance of the estimates of variance components:
## 2.680287e+01 -1.235535e+01   -2.610650e-01   
## -1.235535e+01    1.269376e+01    2.397526e-02    
## -2.610650e-01    2.397526e-02    1.302519e-01    
## Estimatesof fixed effects:
## 
## Source   Estimate    SE
## mean 168.700024  8.866271
## X_2  -0.111501   0.023010
## X_3  -48.135784  488.464289
## X_4  416.564315  296.337132
## X_5  36.881111   190.792892
## X_6  -48.392397  180.984907
## X_7  2.694306    0.611060
## X_8  -9.536273   0.581096
## 
## Summary result of REML analysis has been saved in the file [ozhtPRS5.ACE.hsq].
## 
## Analysis finished at 00:50:21 CEST on Wed Jun 15 2022
## Overall computational time: 9.62 sec.

Format results

# We can read in the gcta results
res <- as.data.frame(read.table("gcta_results_ACE.txt"))
names(res) <- c("Model", "beta", "se")

# To calculate R2 we scale beta into a marginal correlation
# and then square it We are calculating this and adding it
# to the res object
res$r2 <- rbind((res[1, 2]/sd(all$ht, na.rm = T) * sd(all$PRS1,
    na.rm = T))^2, (res[2, 2]/sd(all$ht, na.rm = T) * sd(all$PRS2,
    na.rm = T))^2, (res[3, 2]/sd(all$ht, na.rm = T) * sd(all$PRS3,
    na.rm = T))^2, (res[4, 2]/sd(all$ht, na.rm = T) * sd(all$PRS4,
    na.rm = T))^2, (res[5, 2]/sd(all$ht, na.rm = T) * sd(all$PRS5,
    na.rm = T))^2)


# To test the significance of the betas we can use t-tests
# We are calculating this and adding it to the res object
res$tStat <- res$beta/res$se  # Follows a student distribution
res$pval <- (1 - pt(q = res$tStat, df = 1887, lower.tail = T)) *
    2

# Now we can look at the results
res

A work by Sarah Medland & Baptiste Couvy-Duchesne

baptiste.couvyduchesne@gmail.com

Day 7 ISGW workshop tutorial

Sarah Medland & Baptiste Couvy-Duchesne - 15 June 2022

Setting up environment

PRS calculation using PRSice

Open tables and merge

Format data - to one row per family (long -> wide)

OpenMx model

Calculate GRM using GCTA

Open GRM and draw some plots

Write files for GCTA analysis

Run GCTA with fixed and random effect (AE model)

Format GCTA outputs predictivness

Make barplots to summarise associations

Optional analysis - adapt code to fit ACE model in GCTA

Make matrix of variance covariance for the shared environment

Fit several random effects in GCTA

Format results