Answer sheet for the tutorial
mkdir -p day7
cd day7
cp -r /faculty/sarah/2022/Day7/* .
# PRSice code - run this in the ssh terminal window
Rscript /usr/local/bin/PRSice.R --dir . --prsice /usr/local/bin/PRSice --base weights.prs --target ozbmi --thread 1 --snp SNP --chr CHR --bp BP --A1 A1 --A2 A2 --stat BETA --pvalue P --beta --pheno ozht.pheno --score sum --all-score --bar-levels 0.00000005,0.000001,0.0001,0.01,1 --fastscore --quantile 5 --quant-ref 1 --binary-target F --no-regress
<- read.table("ozht.pheno", header = T)
pheno <- read.table("ozht.cov", header = T)
covar <- read.table("PRSice.all_score", header = T)
PRS
head(PRS)
names(PRS) <- c("FID", "IID", "PRS1", "PRS2", "PRS3", "PRS4",
"PRS5")
head(PRS)
$ht <- pheno$ht * 100
pheno
# Best to use names instead of column number
= merge(pheno, covar[, 2:8], by = "IID")
temp = merge(temp, PRS[, 2:7], by = "IID")
longData
# Run models
<- lm(ht ~ age + sex + PC1 + PC2 + PC3 + PC4, data = longData)
nullModel summary(nullModel)
##
## Call:
## lm(formula = ht ~ age + sex + PC1 + PC2 + PC3 + PC4, data = longData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.7268 -6.0023 -0.2827 5.3050 30.4879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 173.50935 6.65163 26.085 < 2e-16 ***
## age -0.10116 0.01697 -5.961 2.99e-09 ***
## sex -9.80000 0.43327 -22.619 < 2e-16 ***
## PC1 744.83641 373.94861 1.992 0.0465 *
## PC2 240.08216 230.12319 1.043 0.2970
## PC3 -97.14299 151.21233 -0.642 0.5207
## PC4 54.75009 145.82996 0.375 0.7074
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.412 on 1887 degrees of freedom
## (38 observations deleted due to missingness)
## Multiple R-squared: 0.2296, Adjusted R-squared: 0.2271
## F-statistic: 93.71 on 6 and 1887 DF, p-value: < 2.2e-16
# Then we are going to run a model that includes the
# covariates and the first PRS
<- lm(ht ~ age + sex + PC1 + PC2 + PC3 + PC4 + PRS1,
thresh1 data = longData)
summary(thresh1)
##
## Call:
## lm(formula = ht ~ age + sex + PC1 + PC2 + PC3 + PC4 + PRS1, data = longData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.6273 -5.9917 -0.2311 5.5705 29.9557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 175.28775 6.60888 26.523 < 2e-16 ***
## age -0.10084 0.01684 -5.988 2.54e-09 ***
## sex -9.76070 0.43002 -22.698 < 2e-16 ***
## PC1 667.06438 371.36819 1.796 0.0726 .
## PC2 266.81843 228.42069 1.168 0.2429
## PC3 -96.05118 150.05960 -0.640 0.5222
## PC4 35.77279 144.75946 0.247 0.8048
## PRS1 3.11956 0.56855 5.487 4.64e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.348 on 1886 degrees of freedom
## (38 observations deleted due to missingness)
## Multiple R-squared: 0.2417, Adjusted R-squared: 0.2389
## F-statistic: 85.86 on 7 and 1886 DF, p-value: < 2.2e-16
# We can compute the difference in R2 between the 2 models
# using the following code
summary(thresh1)$r.squared - summary(nullModel)$r.squared
## [1] 0.01210506
<- read.table("ozht.zyg", header = T)
zyg
# We will then merge on the zyg data using the individual
# ID 'IID' as the key.
= merge(longData, zyg[, 2:5], by = "IID")
longData1
# Take a look at the result to check that it worked
head(longData1)
# For openMx we need our data to be in a wide format with
# one line per family so we will need to reshape the data.
# In this code the variables listed in v.names are present
# for both twins The variable Twin specifies if the person
# is twin 1 or 2
= reshape(data = longData1, direction = "wide", v.names = c("IID",
wideData "ht", "age", "sex", "PC1", "PC2", "PC3", "PC4", "PRS1", "PRS2",
"PRS3", "PRS4", "PRS5"), idvar = "FID", timevar = "Twin",
sep = "_")
# We will replace all missingness in the definition
# variables with 0 the people with missing definition
# variables don't have phenotypic data so this will not
# bais estimates
c(6:12, 19:25)][is.na(wideData[, c(6:12, 19:25)])] <- 0 wideData[,
library(OpenMx)
source("miFunctions5272022.R")
# Select Variables for Analysis
<- "ht" # list of variables names
vars <- 1 # number of variables
nv <- nv * 2 # number of total variables
ntv <- c("ht_1", "ht_2") # dependent variables
depVar <- c("age_1", "sex_1", "PC1_1", "PC2_1", "PC3_1", "PC4_1",
indVar "PRS1_1", "age_2", "sex_2", "PC1_2", "PC2_2", "PC3_2", "PC4_2",
"PRS1_2") # independent variables or predictors
<- c(depVar, indVar)
selVars
# Select Data for Analysis
<- subset(wideData, MZDZ == 1, selVars)
mzData <- subset(wideData, MZDZ == 2, selVars)
dzData
# Set Starting Values
<- 170 # start value for means
svMe <- 100 # start value for variance
svVa <- 1e-05 # lower bound for variance
lbVa <- 1e-04 # start value for means
svBe
# Create Data Objects for Multiple Groups
<- mxData(observed = mzData, type = "raw")
dataMZ <- mxData(observed = dzData, type = "raw")
dataDZ
# Objects to hold definition variables for the regression
<- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
dPC1 labels = c("data.PC1_1", "data.PC1_2"), name = "dPC1")
<- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
dPC2 labels = c("data.PC2_1", "data.PC2_2"), name = "dPC2")
<- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
dPC3 labels = c("data.PC3_1", "data.PC3_2"), name = "dPC3")
<- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
dPC4 labels = c("data.PC4_1", "data.PC4_2"), name = "dPC4")
<- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
dsex labels = c("data.sex_1", "data.sex_2"), name = "dsex")
<- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
dage labels = c("data.age_1", "data.age_2"), name = "dage")
<- mxMatrix(type = "Full", nrow = 1, ncol = 2, free = FALSE,
dPRS labels = c("data.PRS1_1", "data.PRS1_2"), name = "dPRS")
# Objects to hold betas for the covariates
<- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
bPC1 values = svBe, label = "b1", name = "bPC1")
<- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
bPC2 values = svBe, label = "b2", name = "bPC2")
<- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
bPC3 values = svBe, label = "b3", name = "bPC3")
<- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
bPC4 values = svBe, label = "b4", name = "bPC4")
<- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
bsex values = svBe, label = "b5", name = "bsex")
<- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
bage values = svBe, label = "b6", name = "bage")
<- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
bPRS values = svBe, label = "b7", name = "bPRS")
# Then we will build the model for the means that includes
# the regression tems (we are using Kronecker products here
# to mutliply the values of the independent variables by
# the betas)
<- mxMatrix(type = "Full", nrow = 1, ncol = 1, free = TRUE,
meanG values = svMe, labels = labVars("mean", vars), name = "meanG")
<- mxAlgebra(expression = (meanG + bPC1 %x% dPC1 + bPC2 %x%
ExpMean + bPC3 %x% dPC3 + bPC4 %x% dPC4 + bsex %x% dsex + bage %x%
dPC2 + bPRS %x% dPRS), name = "expMean")
dage
# and do some house keeping (making a list of the objects
# so we can pull these into the model later)
<- c(bPC1, bPC2, bPC3, bPC4, bsex, bage, bPRS, meanG)
pars <- c(dPC1, dPC2, dPC3, dPC4, dsex, dage, dPRS)
defs
# Create Algebra for expected Variance/Covariance Matrices
<- mxMatrix(type = "Symm", nrow = ntv, ncol = ntv, free = TRUE,
covMZ values = c(0.1, 0.05, 0.1), lbound = valDiag(lbVa, ntv),
labels = c("var", "cMZ21", "var"), name = "covMZ")
<- mxMatrix(type = "Symm", nrow = ntv, ncol = ntv, free = TRUE,
covDZ values = c(0.1, 0.05, 0.1), lbound = valDiag(lbVa, ntv),
labels = c("var", "cDZ21", "var"), name = "covDZ")
# Next we build the expection and model objects Create
# Expectation Objects for Multiple Groups
<- mxExpectationNormal(covariance = "covMZ", means = "expMean",
expMZ dimnames = depVar)
<- mxExpectationNormal(covariance = "covDZ", means = "expMean",
expDZ dimnames = depVar)
<- mxFitFunctionML()
funML
# Create Model Objects for Multiple Groups
<- mxModel(pars, defs, ExpMean, covMZ, dataMZ, expMZ,
modelMZ name = "MZ")
funML, <- mxModel(pars, defs, ExpMean, covDZ, dataDZ, expDZ,
modelDZ name = "DZ")
funML, <- mxFitFunctionMultigroup(c("MZ", "DZ"))
multi
# Build Saturated Model
<- mxModel("wPRS", pars, modelMZ, modelDZ, multi)
model_wPRS
# Run model
<- mxRun(model_wPRS)
fit_wPRS <- summary(fit_wPRS)) (sum_wPRS
## Summary of wPRS
##
## free parameters:
## name matrix row col Estimate Std.Error A lbound ubound
## 1 b1 bPC1 1 1 494.07780228 460.780896671
## 2 b2 bPC2 1 1 376.45026775 275.668856990
## 3 b3 bPC3 1 1 -52.34236364 170.976227797
## 4 b4 bPC4 1 1 52.11647033 164.470117427
## 5 b5 bsex 1 1 -8.91504139 0.557884837
## 6 b6 bage 1 1 -0.11091751 0.021763799
## 7 b7 bPRS 1 1 2.58520600 0.690176704
## 8 meanht meanG 1 1 173.54694174 8.320062467
## 9 var MZ.covMZ ht_1 ht_1 74.34378646 2.844644713 1e-05
## 10 cMZ21 MZ.covMZ ht_1 ht_2 69.45798062 2.857977923 0
## 11 cDZ21 DZ.covDZ ht_1 ht_2 16.93968677 3.188544188 0
##
## Model Statistics:
## | Parameters | Degrees of Freedom | Fit (-2lnL units)
## Model: 11 1883 12435.322
## Saturated: NA NA NA
## Independence: NA NA NA
## Number of observations/statistics: 969/1894
##
## Information Criteria:
## | df Penalty | Parameters Penalty | Sample-Size Adjusted
## AIC: 8669.32237 12457.322 12457.598
## BIC: -512.68389 12510.961 12476.025
## CFI: NA
## TLI: 1 (also known as NNFI)
## RMSEA: 0 [95% CI (NA, NA)]
## Prob(RMSEA <= 0.05): NA
## To get additional fit indices, see help(mxRefModels)
## timestamp: 2022-06-15 00:48:26
## Wall clock time: 4.534725 secs
## optimizer: NPSOL
## OpenMx version number: 2.19.5.1
## Need help? See help(mxSummary)
# And then run a submodel whic drops the PRS effect from
# the model Drop the PRS from the model (running the null
# model)
<- mxModel(fit_wPRS, name = "noPRS")
model_Null <- omxSetParameters(model_Null, labels = "b7", free = FALSE,
model_Null values = 0)
<- mxRun(model_Null)
fit_Null <- summary(fit_Null)) (sum_Null
## Summary of noPRS
##
## The final iterate satisfies the optimality conditions to the accuracy requested, but the sequence of iterates has not yet converged. Optimizer was terminated because no further improvement could be made in the merit function (Mx status GREEN).
##
## free parameters:
## name matrix row col Estimate Std.Error A lbound ubound
## 1 b1 bPC1 1 1 596.9028672 444.656189378
## 2 b2 bPC2 1 1 341.5478088 279.361150612
## 3 b3 bPC3 1 1 -60.4220028 141.517064862
## 4 b4 bPC4 1 1 83.9433795 191.642823752
## 5 b5 bsex 1 1 -8.9563185 0.561653963
## 6 b6 bage 1 1 -0.1107354 0.021912748
## 7 meanht meanG 1 1 172.2274408 8.538805730
## 8 var MZ.covMZ ht_1 ht_1 75.2064382 2.879449459 1e-05
## 9 cMZ21 MZ.covMZ ht_1 ht_2 70.3274383 2.892338544 0
## 10 cDZ21 DZ.covDZ ht_1 ht_2 17.6870557 3.240011731 ! 0
##
## Model Statistics:
## | Parameters | Degrees of Freedom | Fit (-2lnL units)
## Model: 10 1884 12449.195
## Saturated: NA NA NA
## Independence: NA NA NA
## Number of observations/statistics: 969/1894
##
## Information Criteria:
## | df Penalty | Parameters Penalty | Sample-Size Adjusted
## AIC: 8681.1947 12469.195 12469.424
## BIC: -505.6878 12517.957 12486.197
## CFI: NA
## TLI: 1 (also known as NNFI)
## RMSEA: 0 [95% CI (NA, NA)]
## Prob(RMSEA <= 0.05): NA
## To get additional fit indices, see help(mxRefModels)
## timestamp: 2022-06-15 00:48:29
## Wall clock time: 2.568562 secs
## optimizer: NPSOL
## OpenMx version number: 2.19.5.1
## Need help? See help(mxSummary)
# We can work out the significance of the PRS effect by
# comparing the fit of the models
mxCompare(fit_wPRS, fit_Null)
## base comparison ep minus2LL df AIC diffLL diffdf p
## 1 wPRS <NA> 11 12435.322 1883 12457.322 NA NA NA
## 2 wPRS noPRS 10 12449.195 1884 12469.195 13.872356 1 0.00019565525
# and calculate r2 as the difference in variance between
# the models gives variance explained by PRS, divided by
# total variance
$MZ$covMZ$values[1, 2] - fit_wPRS$MZ$covMZ$values[1,
(fit_Null2])/fit_Null$MZ$covMZ$values[1, 2]
## [1] 0.012362994
./gcta64 --bfile ozbmi --make-grm --out ozGRM
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:48:29 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Options:
##
## --bfile ozbmi
## --make-grm
## --out ozGRM
##
## Note: GRM is computed using the SNPs on the autosomes.
## Reading PLINK FAM file from [ozbmi.fam]...
## 1956 individuals to be included from FAM file.
## 1956 individuals to be included. 651 males, 1305 females, 0 unknown.
## Reading PLINK BIM file from [ozbmi.bim]...
## 9758 SNPs to be included from BIM file(s).
## Computing the genetic relationship matrix (GRM) v2 ...
## Subset 1/1, no. subject 1-1956
## 1956 samples, 9758 markers, 1913946 GRM elements
## IDs for the GRM file have been saved in the file [ozGRM.grm.id]
## Computing GRM...
## 100% finished in 2.1 sec
## 9758 SNPs have been processed.
## Used 9718 valid SNPs.
## The GRM computation is completed.
## Saving GRM...
## GRM has been saved in the file [ozGRM.grm.bin]
## Number of SNPs in each pair of individuals has been saved in the file [ozGRM.grm.N.bin]
##
## Analysis finished at 00:48:31 CEST on Wed Jun 15 2022
## Overall computational time: 2.22 sec.
# Open GRM and visualise off-diagonal distribution
= function(prefix, AllN = F, size = 4) {
ReadGRMBin = function(i) {
sum_i return(sum(1:i))
}= paste(prefix, ".grm.bin", sep = "")
BinFileName = paste(prefix, ".grm.N.bin", sep = "")
NFileName = paste(prefix, ".grm.id", sep = "")
IDFileName = read.table(IDFileName)
id = dim(id)[1]
n = file(BinFileName, "rb")
BinFile = readBin(BinFile, n = n * (n + 1)/2, what = numeric(0),
grm size = size)
= file(NFileName, "rb")
NFile if (AllN == T) {
= readBin(NFile, n = n * (n + 1)/2, what = numeric(0),
N size = size)
else N = readBin(NFile, n = 1, what = numeric(0), size = size)
} = sapply(1:n, sum_i)
i return(list(diag = grm[i], off = grm[-i], id = id, N = N))
}
= ReadGRMBin(prefix = "ozGRM")
GRM
hist(GRM$diag, breaks = 100) # Large values can indicate data problems or outstanding homozygosity rate
#
hist(GRM$off, breaks = 100)
# This plot isn't very informative as most relationship
# coefficents are close to 0. We can update our plot to
# only show relationship coefficents greater than 0.05
# using the following code
hist(GRM$off[which(GRM$off > 0.05)], breaks = 100)
# This plot clearly shows we have a lot of siblings and MZ
# twin pairs. If we zoom in even further we might spot
# those cousins
hist(GRM$off[which(GRM$off > 0.05)], breaks = 100, xlim = c(0.05,
0.3))
= read.table("PRSice.all_score", header = T)
prs <- read.table("ozht.cov", header = T)
covar <- read.table("ozht.pheno", header = T)
pheno
# Then rescaling and renaming
$ht <- pheno$ht * 100
phenonames(prs) <- c("FID", "IID", "PRS1", "PRS2", "PRS3", "PRS4",
"PRS5")
# Then merging the data into 1 dataframe
= merge(covar, prs, by = c("IID", "FID"))
all = merge(all, pheno, by = c("IID", "FID"))
all
# Then we'll write out the covariate files for the
# continuous covariates that we want to include in the GCTA
# model It's important that the PRS is in the last column
# of this file
write.table(all[, c("FID", "IID", "age", "PC1", "PC2", "PC3",
"PC4", "PRS1")], "ozht.prs1.qcovar", quote = F, row.names = F)
write.table(all[, c("FID", "IID", "age", "PC1", "PC2", "PC3",
"PC4", "PRS2")], "ozht.prs2.qcovar", quote = F, row.names = F)
write.table(all[, c("FID", "IID", "age", "PC1", "PC2", "PC3",
"PC4", "PRS3")], "ozht.prs3.qcovar", quote = F, row.names = F)
write.table(all[, c("FID", "IID", "age", "PC1", "PC2", "PC3",
"PC4", "PRS4")], "ozht.prs4.qcovar", quote = F, row.names = F)
write.table(all[, c("FID", "IID", "age", "PC1", "PC2", "PC3",
"PC4", "PRS5")], "ozht.prs5.qcovar", quote = F, row.names = F)
# Then we'll write out the covariate files for the binary
# covariates that we want to include in the GCTA model
write.table(all[, c("FID", "IID", "sex")], "ozht.covar", quote = F,
row.names = F)
# Then we'll write out the phenotype files for the GCTA
# model
write.table(all[, c("FID", "IID", "ht")], "ozht.phen", quote = F,
row.names = F)
./gcta64 --pheno ozht.phen --qcovar ozht.prs1.qcovar --covar ozht.covar --grm ozGRM --out ozhtPRS1 --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs2.qcovar --covar ozht.covar --grm ozGRM --out ozhtPRS2 --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs3.qcovar --covar ozht.covar --grm ozGRM --out ozhtPRS3 --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs4.qcovar --covar ozht.covar --grm ozGRM --out ozhtPRS4 --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs5.qcovar --covar ozht.covar --grm ozGRM --out ozhtPRS5 --reml --reml-est-fix
#GCTA will make a .log file and an .hsq file for each model.
#The beta and SE of the PRS effect can be found in the line labeled X_7 in the log files.
#We can pull these out of the log files and save them to a file with the following code
grep X_7 *.log > gcta_results.txt
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:48:33 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs1.qcovar
## --covar ozht.covar
## --grm ozGRM
## --out ozhtPRS1
## --reml
## --reml-est-fix
##
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
##
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs1.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs1.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
##
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
##
## Performing REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 2 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 43.9528 34.2896
## logL: -4810.94
## Running AI-REML algorithm ...
## Iter. logL V(G) V(e)
## 1 -4751.85 59.84011 17.40093
## 2 -4647.03 70.13324 10.91462
## 3 -4590.77 76.39449 8.49173
## 4 -4567.32 80.61353 7.36394
## 5 -4557.13 83.63826 6.74011
## 6 -4552.33 85.84919 6.35988
## 7 -4549.96 87.47110 6.11437
## 8 -4548.77 88.65891 5.94998
## 9 -4548.16 89.52614 5.83726
## 10 -4547.85 91.52381 5.58866
## 11 -4547.51 91.78332 5.56727
## 12 -4547.51 91.80849 5.56429
## 13 -4547.51 91.81150 5.56392
## Log-likelihood ratio converged.
##
## Calculating the logLikelihood for the reduced model ...
## (variance component 1 is dropped from the model)
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 69.78139
## logL: -4980.18681
## Running AI-REML algorithm ...
## Iter. logL V(e)
## 1 -4948.05 69.75216
## 2 -4948.05 69.73219
## 3 -4948.05 69.68899
## 4 -4948.05 69.68902
## 5 -4948.05 69.68902
## Log-likelihood ratio converged.
##
## Summary result of REML analysis:
## Source Variance SE
## V(G) 91.811500 3.895050
## V(e) 5.563916 0.357498
## Vp 97.375416 3.852776
## V(G)/Vp 0.942861 0.004449
##
## Sampling variance/covariance of the estimates of variance components:
## 1.517142e+01 -2.276706e-01
## -2.276706e-01 1.278046e-01
## Estimatesof fixed effects:
##
## Source Estimate SE
## mean 169.095957 8.764314
## X_2 -0.113977 0.022456
## X_3 -17.551326 484.849288
## X_4 410.861460 292.698935
## X_5 51.660560 189.243241
## X_6 -41.123137 179.306181
## X_7 3.182480 0.808135
## X_8 -9.535405 0.566582
##
## Summary result of REML analysis has been saved in the file [ozhtPRS1.hsq].
##
## Analysis finished at 00:48:44 CEST on Wed Jun 15 2022
## Overall computational time: 11.16 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:48:44 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs2.qcovar
## --covar ozht.covar
## --grm ozGRM
## --out ozhtPRS2
## --reml
## --reml-est-fix
##
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
##
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs2.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs2.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
##
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
##
## Performing REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 2 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 43.828 34.2361
## logL: -4807.36
## Running AI-REML algorithm ...
## Iter. logL V(G) V(e)
## 1 -4747.62 59.53572 17.48921
## 2 -4644.39 69.76967 10.98529
## 3 -4588.49 76.01234 8.53453
## 4 -4564.96 80.21423 7.39286
## 5 -4554.71 83.22495 6.76154
## 6 -4549.88 85.42585 6.37683
## 7 -4547.49 87.04118 6.12843
## 8 -4546.29 88.22501 5.96209
## 9 -4545.67 89.09008 5.84799
## 10 -4545.36 91.08456 5.59628
## 11 -4545.02 91.34835 5.57435
## 12 -4545.01 91.37438 5.57124
## 13 -4545.01 91.37754 5.57085
## Log-likelihood ratio converged.
##
## Calculating the logLikelihood for the reduced model ...
## (variance component 1 is dropped from the model)
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 69.17535
## logL: -4974.01940
## Running AI-REML algorithm ...
## Iter. logL V(e)
## 1 -4939.88 69.14531
## 2 -4939.88 69.12478
## 3 -4939.88 69.08038
## 4 -4939.88 69.08041
## 5 -4939.88 69.08041
## Log-likelihood ratio converged.
##
## Summary result of REML analysis:
## Source Variance SE
## V(G) 91.377539 3.879798
## V(e) 5.570853 0.358140
## Vp 96.948392 3.837078
## V(G)/Vp 0.942538 0.004478
##
## Sampling variance/covariance of the estimates of variance components:
## 1.505283e+01 -2.289649e-01
## -2.289649e-01 1.282644e-01
## Estimatesof fixed effects:
##
## Source Estimate SE
## mean 168.417530 8.736724
## X_2 -0.112660 0.022406
## X_3 15.076693 483.651444
## X_4 406.821756 292.025107
## X_5 54.259660 188.838053
## X_6 -65.133325 178.945021
## X_7 3.286590 0.720470
## X_8 -9.516487 0.565417
##
## Summary result of REML analysis has been saved in the file [ozhtPRS2.hsq].
##
## Analysis finished at 00:48:55 CEST on Wed Jun 15 2022
## Overall computational time: 11.14 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:48:55 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs3.qcovar
## --covar ozht.covar
## --grm ozGRM
## --out ozhtPRS3
## --reml
## --reml-est-fix
##
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
##
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs3.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs3.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
##
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
##
## Performing REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 2 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 43.7761 34.2641
## logL: -4806.95
## Running AI-REML algorithm ...
## Iter. logL V(G) V(e)
## 1 -4747.50 59.39239 17.64269
## 2 -4646.02 69.67637 11.07406
## 3 -4590.07 75.97320 8.58117
## 4 -4566.24 80.20843 7.42150
## 5 -4555.84 83.24244 6.78120
## 6 -4550.93 85.46101 6.39135
## 7 -4548.50 87.09011 6.13976
## 8 -4547.28 88.28472 5.97131
## 9 -4546.65 89.15818 5.85577
## 10 -4546.33 91.17320 5.60084
## 11 -4545.98 91.44246 5.57847
## 12 -4545.97 91.46926 5.57527
## 13 -4545.97 91.47254 5.57487
## Log-likelihood ratio converged.
##
## Calculating the logLikelihood for the reduced model ...
## (variance component 1 is dropped from the model)
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 68.88252
## logL: -4971.07558
## Running AI-REML algorithm ...
## Iter. logL V(e)
## 1 -4935.95 68.85209
## 2 -4935.95 68.83129
## 3 -4935.94 68.78631
## 4 -4935.94 68.78634
## 5 -4935.94 68.78634
## Log-likelihood ratio converged.
##
## Summary result of REML analysis:
## Source Variance SE
## V(G) 91.472544 3.884525
## V(e) 5.574867 0.358533
## Vp 97.047411 3.841601
## V(G)/Vp 0.942555 0.004479
##
## Sampling variance/covariance of the estimates of variance components:
## 1.508953e+01 -2.300927e-01
## -2.300927e-01 1.285462e-01
## Estimatesof fixed effects:
##
## Source Estimate SE
## mean 168.639132 8.743245
## X_2 -0.112417 0.022418
## X_3 -16.914727 484.031778
## X_4 417.248000 292.245842
## X_5 47.967643 188.946734
## X_6 -54.884823 179.008572
## X_7 2.898535 0.664494
## X_8 -9.535677 0.565604
##
## Summary result of REML analysis has been saved in the file [ozhtPRS3.hsq].
##
## Analysis finished at 00:49:06 CEST on Wed Jun 15 2022
## Overall computational time: 10.93 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:49:06 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs4.qcovar
## --covar ozht.covar
## --grm ozGRM
## --out ozhtPRS4
## --reml
## --reml-est-fix
##
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
##
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs4.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs4.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
##
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
##
## Performing REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 2 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 43.7703 34.2567
## logL: -4806.73
## Running AI-REML algorithm ...
## Iter. logL V(G) V(e)
## 1 -4747.20 59.39007 17.62675
## 2 -4645.59 69.66570 11.06386
## 3 -4589.65 75.95222 8.57561
## 4 -4565.86 80.17987 7.41804
## 5 -4555.48 83.20819 6.77879
## 6 -4550.58 85.42239 6.38956
## 7 -4548.16 87.04814 6.13836
## 8 -4546.94 88.24021 5.97017
## 9 -4546.31 89.11175 5.85481
## 10 -4545.99 91.12224 5.60027
## 11 -4545.65 91.39066 5.57795
## 12 -4545.64 91.41736 5.57477
## 13 -4545.64 91.42062 5.57436
## Log-likelihood ratio converged.
##
## Calculating the logLikelihood for the reduced model ...
## (variance component 1 is dropped from the model)
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 68.84960
## logL: -4970.79105
## Running AI-REML algorithm ...
## Iter. logL V(e)
## 1 -4935.55 68.81912
## 2 -4935.55 68.79829
## 3 -4935.55 68.75324
## 4 -4935.55 68.75327
## 5 -4935.55 68.75327
## Log-likelihood ratio converged.
##
## Summary result of REML analysis:
## Source Variance SE
## V(G) 91.420622 3.882391
## V(e) 5.574364 0.358485
## Vp 96.994986 3.839481
## V(G)/Vp 0.942529 0.004480
##
## Sampling variance/covariance of the estimates of variance components:
## 1.507296e+01 -2.299303e-01
## -2.299303e-01 1.285118e-01
## Estimatesof fixed effects:
##
## Source Estimate SE
## mean 168.212392 8.737739
## X_2 -0.112576 0.022412
## X_3 2.572887 483.807226
## X_4 412.614296 292.132395
## X_5 48.578056 188.894702
## X_6 -57.183480 178.965315
## X_7 2.801184 0.629326
## X_8 -9.528049 0.565490
##
## Summary result of REML analysis has been saved in the file [ozhtPRS4.hsq].
##
## Analysis finished at 00:49:17 CEST on Wed Jun 15 2022
## Overall computational time: 10.97 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:49:17 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs5.qcovar
## --covar ozht.covar
## --grm ozGRM
## --out ozhtPRS5
## --reml
## --reml-est-fix
##
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
##
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs5.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs5.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
##
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
##
## Performing REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 2 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 43.7455 34.2568
## logL: -4806.24
## Running AI-REML algorithm ...
## Iter. logL V(G) V(e)
## 1 -4746.69 59.33900 17.65774
## 2 -4645.51 69.62249 11.07936
## 3 -4589.55 75.91415 8.58302
## 4 -4565.71 80.14304 7.42229
## 5 -4555.30 83.17158 6.78155
## 6 -4550.39 85.38582 6.39149
## 7 -4547.97 87.01165 6.13978
## 8 -4546.74 88.20385 5.97127
## 9 -4546.12 89.07556 5.85569
## 10 -4545.79 91.08660 5.60067
## 11 -4545.45 91.35558 5.57830
## 12 -4545.44 91.38236 5.57510
## 13 -4545.44 91.38565 5.57470
## Log-likelihood ratio converged.
##
## Calculating the logLikelihood for the reduced model ...
## (variance component 1 is dropped from the model)
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 68.71671
## logL: -4969.43466
## Running AI-REML algorithm ...
## Iter. logL V(e)
## 1 -4933.73 68.68605
## 2 -4933.73 68.66510
## 3 -4933.73 68.61979
## 4 -4933.73 68.61982
## 5 -4933.73 68.61982
## Log-likelihood ratio converged.
##
## Summary result of REML analysis:
## Source Variance SE
## V(G) 91.385646 3.881152
## V(e) 5.574698 0.358524
## Vp 96.960344 3.838207
## V(G)/Vp 0.942505 0.004483
##
## Sampling variance/covariance of the estimates of variance components:
## 1.506334e+01 -2.300251e-01
## -2.300251e-01 1.285397e-01
## Estimatesof fixed effects:
##
## Source Estimate SE
## mean 168.446708 8.737619
## X_2 -0.113225 0.022407
## X_3 -9.622066 483.773546
## X_4 412.972238 292.081569
## X_5 48.675267 188.861598
## X_6 -57.801962 178.935760
## X_7 2.763790 0.613928
## X_8 -9.544225 0.565300
##
## Summary result of REML analysis has been saved in the file [ozhtPRS5.hsq].
##
## Analysis finished at 00:49:28 CEST on Wed Jun 15 2022
## Overall computational time: 10.91 sec.
# We can read in the gcta results
<- as.data.frame(read.table("gcta_results.txt"))
res names(res) <- c("Model", "beta", "se")
# To calculate R2 we scale beta into a marginal correlation
# and then square it We are calculating this and adding it
# to the res object
$r2 <- rbind((res[1, 2]/sd(all$ht, na.rm = T) * sd(all$PRS1,
resna.rm = T))^2, (res[2, 2]/sd(all$ht, na.rm = T) * sd(all$PRS2,
na.rm = T))^2, (res[3, 2]/sd(all$ht, na.rm = T) * sd(all$PRS3,
na.rm = T))^2, (res[4, 2]/sd(all$ht, na.rm = T) * sd(all$PRS4,
na.rm = T))^2, (res[5, 2]/sd(all$ht, na.rm = T) * sd(all$PRS5,
na.rm = T))^2)
# To test the significance of the betas we can use t-tests
# We are calculating this and adding it to the res object
$tStat <- res$beta/res$se # Follows a student distribution
res$pval <- (1 - pt(q = res$tStat, df = 1887, lower.tail = T)) *
res2
# Now we can look at the results
res
<- data.frame(name = c("5e-08", "1e-06", "0.0001", "0.01",
r2 "1"), R2 = c(1.236, 1.913, 2.218, 2.34, 2.561), P = c(0.000195,
5.04e-06, 6.91e-07, 2.67e-07, 6.12e-08))
# Increase bottom margin
par(mar = c(4, 4, 4, 4))
# Barplot jpeg('height.R2.jpg', width = 800, height = 800,
# quality=75, bg = 'white', type = 'cairo')
<- barplot(r2$R2, border = F, names.arg = r2$name, las = 2,
my_bar col = c("skyblue1", "skyblue2", "skyblue3", "skyblue4"),
ylim = c(0, 3), main = "Predicting height from height PRS",
ylab = "% variance explained", xlab = "PRS Thresholds")
# Add P values to the bars
text(my_bar, r2$R2 + 0.04, paste("P=", r2$P, sep = ""), cex = 1)
# dev.off()
# Next we'll plot height by quintile of PRS (often these
# plots are made with deciles but the sample is small so
# we're going to use quintiles here)
library(Hmisc)
library(gplots)
# the following command divides the data into quintiles.
$percentiles <- factor(cut2(longData$PRS5, g = 5, levels.mean = TRUE))
longData
# Plot the mean of height by precentile with 95% CIs
# jpeg('Quintile.jpg', width = 800, height = 800,
# quality=75, bg = 'white', type = 'cairo')
plotmeans(ht ~ percentiles, data = longData, xlab = "Quintile of PRS",
ylab = "Height in Cm", legends = c("1", "2", "3", "4", "5"))
# dev.off()
# We create this text file, which stores the name of the different GRM/CRM matrices so GCTA knows which to open
echo "ozGRM" > GRM-CRM.txt
echo "ozCRM" >> GRM-CRM.txt
head GRM-CRM.txt
./gcta64 --pheno ozht.phen --qcovar ozht.prs1.qcovar --covar ozht.covar --mgrm GRM-CRM.txt --out ozhtPRS1.ACE --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs2.qcovar --covar ozht.covar --mgrm GRM-CRM.txt --out ozhtPRS2.ACE --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs3.qcovar --covar ozht.covar --mgrm GRM-CRM.txt --out ozhtPRS3.ACE --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs4.qcovar --covar ozht.covar --mgrm GRM-CRM.txt --out ozhtPRS4.ACE --reml --reml-est-fix
./gcta64 --pheno ozht.phen --qcovar ozht.prs5.qcovar --covar ozht.covar --mgrm GRM-CRM.txt --out ozhtPRS5.ACE --reml --reml-est-fix
grep X_7 *ACE.log > gcta_results_ACE.txt
## ozGRM
## ozCRM
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:49:34 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs1.qcovar
## --covar ozht.covar
## --mgrm GRM-CRM.txt
## --out ozhtPRS1.ACE
## --reml
## --reml-est-fix
##
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
##
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs1.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs1.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
##
## There are 2 GRM file names specified in the file [GRM-CRM.txt].
## Reading the GRM from the 1th file ...
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading the GRM from the 2th file ...
## Reading IDs of the GRM from [ozCRM.grm.id].
## 1956 IDs are read from [ozCRM.grm.id].
## Reading the GRM from [ozCRM.grm.bin].
## GRM for 1956 individuals are included from [ozCRM.grm.bin].
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
##
## Performing REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 3 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 29.9405 29.1033 24.645
## logL: -4736.31
## Running AI-REML algorithm ...
## Iter. logL V(G1) V(G2) V(e)
## 1 -4700.32 50.36555 18.19224 13.55946
## 2 -4611.85 61.35311 13.83297 9.51921
## 3 -4573.17 67.87119 11.53647 7.88367
## 4 -4557.75 72.23652 10.14153 7.04864
## 5 -4550.93 75.29060 9.25897 6.56151
## 6 -4547.70 77.45579 8.69594 6.25505
## 7 -4546.10 78.99608 8.33749 6.05316
## 8 -4545.30 80.09254 8.11040 5.91617
## 9 -4544.89 82.56309 7.65749 5.61614
## 10 -4544.46 82.77717 7.74327 5.59209
## 11 -4544.46 82.82704 7.72411 5.58854
## 12 -4544.46 82.82629 7.72855 5.58805
## Log-likelihood ratio converged.
##
## Summary result of REML analysis:
## Source Variance SE
## V(G1) 82.826286 5.203826
## V(G2) 7.728551 3.584645
## V(e) 5.588054 0.359747
## Vp 96.142891 3.815144
## V(G1)/Vp 0.861492 0.037840
## V(G2)/Vp 0.080386 0.037338
##
## Sum of V(G)/Vp 0.941878 0.004543
##
## Sampling variance/covariance of the estimates of variance components:
## 2.707981e+01 -1.251748e+01 -2.574890e-01
## -1.251748e+01 1.284968e+01 2.317447e-02
## -2.574890e-01 2.317447e-02 1.294179e-01
## Estimatesof fixed effects:
##
## Source Estimate SE
## mean 169.255966 8.892221
## X_2 -0.112214 0.023057
## X_3 -49.312966 489.526725
## X_4 414.675175 296.962542
## X_5 39.556627 191.180832
## X_6 -31.085455 181.347441
## X_7 3.071223 0.804539
## X_8 -9.529359 0.582336
##
## Summary result of REML analysis has been saved in the file [ozhtPRS1.ACE.hsq].
##
## Analysis finished at 00:49:43 CEST on Wed Jun 15 2022
## Overall computational time: 9.18 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:49:43 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs2.qcovar
## --covar ozht.covar
## --mgrm GRM-CRM.txt
## --out ozhtPRS2.ACE
## --reml
## --reml-est-fix
##
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
##
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs2.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs2.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
##
## There are 2 GRM file names specified in the file [GRM-CRM.txt].
## Reading the GRM from the 1th file ...
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading the GRM from the 2th file ...
## Reading IDs of the GRM from [ozCRM.grm.id].
## 1956 IDs are read from [ozCRM.grm.id].
## Reading the GRM from [ozCRM.grm.bin].
## GRM for 1956 individuals are included from [ozCRM.grm.bin].
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
##
## Performing REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 3 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 29.8995 29.0562 24.6377
## logL: -4733.47
## Running AI-REML algorithm ...
## Iter. logL V(G1) V(G2) V(e)
## 1 -4697.25 50.24276 18.04696 13.61573
## 2 -4609.52 61.21929 13.64557 9.56059
## 3 -4570.92 67.72897 11.33108 7.91190
## 4 -4555.47 72.08346 9.92817 7.06959
## 5 -4548.62 75.12825 9.04166 6.57814
## 6 -4545.34 77.28681 8.47639 6.26889
## 7 -4543.77 78.82292 8.11646 6.06511
## 8 -4542.96 79.91706 7.88831 5.92680
## 9 -4542.55 82.38431 7.43282 5.62374
## 10 -4542.11 82.60353 7.51772 5.59911
## 11 -4542.11 82.65447 7.49848 5.59543
## 12 -4542.11 82.65387 7.50293 5.59492
## 13 -4542.11 82.65550 7.50186 5.59486
## Log-likelihood ratio converged.
##
## Summary result of REML analysis:
## Source Variance SE
## V(G1) 82.655495 5.190102
## V(G2) 7.501856 3.565254
## V(e) 5.594857 0.360340
## Vp 95.752208 3.801264
## V(G1)/Vp 0.863223 0.037804
## V(G2)/Vp 0.078347 0.037294
##
## Sum of V(G)/Vp 0.941569 0.004571
##
## Sampling variance/covariance of the estimates of variance components:
## 2.693716e+01 -1.242863e+01 -2.588074e-01
## -1.242863e+01 1.271104e+01 2.321948e-02
## -2.588074e-01 2.321948e-02 1.298450e-01
## Estimatesof fixed effects:
##
## Source Estimate SE
## mean 168.624485 8.862613
## X_2 -0.110984 0.022994
## X_3 -17.724628 488.242119
## X_4 409.842478 296.195656
## X_5 43.235878 190.742260
## X_6 -55.042363 180.962099
## X_7 3.174626 0.717059
## X_8 -9.508999 0.580801
##
## Summary result of REML analysis has been saved in the file [ozhtPRS2.ACE.hsq].
##
## Analysis finished at 00:49:53 CEST on Wed Jun 15 2022
## Overall computational time: 10.07 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:49:53 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs3.qcovar
## --covar ozht.covar
## --mgrm GRM-CRM.txt
## --out ozhtPRS3.ACE
## --reml
## --reml-est-fix
##
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
##
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs3.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs3.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
##
## There are 2 GRM file names specified in the file [GRM-CRM.txt].
## Reading the GRM from the 1th file ...
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading the GRM from the 2th file ...
## Reading IDs of the GRM from [ozCRM.grm.id].
## 1956 IDs are read from [ozCRM.grm.id].
## Reading the GRM from [ozCRM.grm.bin].
## GRM for 1956 individuals are included from [ozCRM.grm.bin].
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
##
## Performing REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 3 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 29.8736 29.0475 24.6513
## logL: -4732.9
## Running AI-REML algorithm ...
## Iter. logL V(G1) V(G2) V(e)
## 1 -4696.82 50.02688 18.17504 13.71829
## 2 -4610.66 61.02046 13.79117 9.62093
## 3 -4572.02 67.54188 11.49160 7.94854
## 4 -4556.43 71.90039 10.10100 7.09463
## 5 -4549.51 74.94802 9.22342 6.59669
## 6 -4546.23 77.10985 8.66428 6.28347
## 7 -4544.60 78.64964 8.30847 6.07709
## 8 -4543.78 79.74753 8.08303 5.93699
## 9 -4543.36 82.22597 7.63312 5.62996
## 10 -4542.92 82.45294 7.71726 5.60473
## 11 -4542.91 82.50486 7.69811 5.60091
## 12 -4542.91 82.50445 7.70257 5.60038
## 13 -4542.91 82.50611 7.70149 5.60032
## Log-likelihood ratio converged.
##
## Summary result of REML analysis:
## Source Variance SE
## V(G1) 82.506111 5.184994
## V(G2) 7.701490 3.567178
## V(e) 5.600317 0.360871
## Vp 95.807918 3.804076
## V(G1)/Vp 0.861162 0.037797
## V(G2)/Vp 0.080385 0.037287
##
## Sum of V(G)/Vp 0.941546 0.004575
##
## Sampling variance/covariance of the estimates of variance components:
## 2.688416e+01 -1.239702e+01 -2.608549e-01
## -1.239702e+01 1.272476e+01 2.379687e-02
## -2.608549e-01 2.379687e-02 1.302279e-01
## Estimatesof fixed effects:
##
## Source Estimate SE
## mean 168.920867 8.871820
## X_2 -0.110746 0.023018
## X_3 -52.818537 488.707817
## X_4 419.092591 296.477111
## X_5 36.334022 190.873399
## X_6 -44.340405 181.048200
## X_7 2.813803 0.661036
## X_8 -9.527184 0.581329
##
## Summary result of REML analysis has been saved in the file [ozhtPRS3.ACE.hsq].
##
## Analysis finished at 00:50:03 CEST on Wed Jun 15 2022
## Overall computational time: 9.54 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:50:03 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs4.qcovar
## --covar ozht.covar
## --mgrm GRM-CRM.txt
## --out ozhtPRS4.ACE
## --reml
## --reml-est-fix
##
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
##
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs4.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs4.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
##
## There are 2 GRM file names specified in the file [GRM-CRM.txt].
## Reading the GRM from the 1th file ...
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading the GRM from the 2th file ...
## Reading IDs of the GRM from [ozCRM.grm.id].
## 1956 IDs are read from [ozCRM.grm.id].
## Reading the GRM from [ozCRM.grm.bin].
## GRM for 1956 individuals are included from [ozCRM.grm.bin].
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
##
## Performing REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 3 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 29.8714 29.0453 24.6475
## logL: -4732.7
## Running AI-REML algorithm ...
## Iter. logL V(G1) V(G2) V(e)
## 1 -4696.57 50.05982 18.14688 13.69875
## 2 -4610.19 61.03608 13.76834 9.61007
## 3 -4571.60 67.54205 11.47352 7.94229
## 4 -4556.05 71.88905 10.08655 7.09055
## 5 -4549.15 74.92784 9.21178 6.59380
## 6 -4545.87 77.08286 8.65476 6.28129
## 7 -4544.25 78.61747 8.30052 6.07537
## 8 -4543.44 79.71148 8.07621 5.93558
## 9 -4543.02 82.18079 7.62885 5.62923
## 10 -4542.58 82.40633 7.71308 5.60410
## 11 -4542.57 82.45812 7.69390 5.60030
## 12 -4542.57 82.45766 7.69839 5.59977
## 13 -4542.57 82.45932 7.69729 5.59971
## Log-likelihood ratio converged.
##
## Summary result of REML analysis:
## Source Variance SE
## V(G1) 82.459325 5.181032
## V(G2) 7.697294 3.563947
## V(e) 5.599706 0.360812
## Vp 95.756325 3.801953
## V(G1)/Vp 0.861137 0.037783
## V(G2)/Vp 0.080384 0.037273
##
## Sum of V(G)/Vp 0.941521 0.004577
##
## Sampling variance/covariance of the estimates of variance components:
## 2.684309e+01 -1.237322e+01 -2.605583e-01
## -1.237322e+01 1.270172e+01 2.370034e-02
## -2.605583e-01 2.370034e-02 1.301853e-01
## Estimatesof fixed effects:
##
## Source Estimate SE
## mean 168.476664 8.865994
## X_2 -0.110867 0.023012
## X_3 -35.580668 488.477468
## X_4 415.705484 296.370772
## X_5 36.669484 190.821852
## X_6 -47.537680 181.007689
## X_7 2.723517 0.626279
## X_8 -9.520307 0.581206
##
## Summary result of REML analysis has been saved in the file [ozhtPRS4.ACE.hsq].
##
## Analysis finished at 00:50:12 CEST on Wed Jun 15 2022
## Overall computational time: 8.75 sec.
## *******************************************************************
## * Genome-wide Complex Trait Analysis (GCTA)
## * version 1.94.0 beta Mac
## * (C) 2010-present, Jian Yang, The University of Queensland
## * Please report bugs to Jian Yang <jian.yang.qt@gmail.com>
## *******************************************************************
## Analysis started at 00:50:12 CEST on Wed Jun 15 2022.
## Hostname: imb17-013265-lt
##
## Accepted options:
## --pheno ozht.phen
## --qcovar ozht.prs5.qcovar
## --covar ozht.covar
## --mgrm GRM-CRM.txt
## --out ozhtPRS5.ACE
## --reml
## --reml-est-fix
##
## Note: This is a multi-thread program. You could specify the number of threads by the --thread-num option to speed up the computation if there are multiple processors in your machine.
##
## Reading phenotypes from [ozht.phen].
## Non-missing phenotypes of 1895 individuals are included from [ozht.phen].
## Reading quantitative covariate(s) from [ozht.prs5.qcovar].
## 6 quantitative covariate(s) of 1933 individuals are included from [ozht.prs5.qcovar].
## Reading discrete covariate(s) from [ozht.covar].
## 1 discrete covariate(s) of 1933 individuals are included from [ozht.covar].
##
## There are 2 GRM file names specified in the file [GRM-CRM.txt].
## Reading the GRM from the 1th file ...
## Reading IDs of the GRM from [ozGRM.grm.id].
## 1956 IDs are read from [ozGRM.grm.id].
## Reading the GRM from [ozGRM.grm.bin].
## GRM for 1956 individuals are included from [ozGRM.grm.bin].
## Reading the GRM from the 2th file ...
## Reading IDs of the GRM from [ozCRM.grm.id].
## 1956 IDs are read from [ozCRM.grm.id].
## Reading the GRM from [ozCRM.grm.bin].
## GRM for 1956 individuals are included from [ozCRM.grm.bin].
## 6 quantitative variable(s) included as covariate(s).
## 1 discrete variable(s) included as covariate(s).
## 1894 individuals are in common in these files.
##
## Performing REML analysis ... (Note: may take hours depending on sample size).
## 1894 observations, 8 fixed effect(s), and 3 variance component(s)(including residual variance).
## Calculating prior values of variance components by EM-REML ...
## Updated prior values: 29.8578 29.0401 24.6464
## logL: -4732.1
## Running AI-REML algorithm ...
## Iter. logL V(G1) V(G2) V(e)
## 1 -4695.94 49.99573 18.17732 13.71870
## 2 -4609.97 60.96663 13.80366 9.62128
## 3 -4571.39 67.46612 11.51264 7.94896
## 4 -4555.83 71.80684 10.12860 7.09504
## 5 -4548.92 74.84089 9.25582 6.59709
## 6 -4545.63 76.99269 8.70006 6.28385
## 7 -4544.02 78.52530 8.34655 6.07745
## 8 -4543.20 79.61813 8.12264 5.93734
## 9 -4542.78 82.08548 7.67591 5.63026
## 10 -4542.34 82.31263 7.75955 5.60502
## 11 -4542.33 82.36451 7.74045 5.60120
## 12 -4542.33 82.36411 7.74491 5.60067
## 13 -4542.33 82.36578 7.74383 5.60060
## Log-likelihood ratio converged.
##
## Summary result of REML analysis:
## Source Variance SE
## V(G1) 82.365776 5.177148
## V(G2) 7.743826 3.562831
## V(e) 5.600599 0.360904
## Vp 95.710201 3.800263
## V(G1)/Vp 0.860575 0.037789
## V(G2)/Vp 0.080909 0.037278
##
## Sum of V(G)/Vp 0.941484 0.004581
##
## Sampling variance/covariance of the estimates of variance components:
## 2.680287e+01 -1.235535e+01 -2.610650e-01
## -1.235535e+01 1.269376e+01 2.397526e-02
## -2.610650e-01 2.397526e-02 1.302519e-01
## Estimatesof fixed effects:
##
## Source Estimate SE
## mean 168.700024 8.866271
## X_2 -0.111501 0.023010
## X_3 -48.135784 488.464289
## X_4 416.564315 296.337132
## X_5 36.881111 190.792892
## X_6 -48.392397 180.984907
## X_7 2.694306 0.611060
## X_8 -9.536273 0.581096
##
## Summary result of REML analysis has been saved in the file [ozhtPRS5.ACE.hsq].
##
## Analysis finished at 00:50:21 CEST on Wed Jun 15 2022
## Overall computational time: 9.62 sec.
# We can read in the gcta results
<- as.data.frame(read.table("gcta_results_ACE.txt"))
res names(res) <- c("Model", "beta", "se")
# To calculate R2 we scale beta into a marginal correlation
# and then square it We are calculating this and adding it
# to the res object
$r2 <- rbind((res[1, 2]/sd(all$ht, na.rm = T) * sd(all$PRS1,
resna.rm = T))^2, (res[2, 2]/sd(all$ht, na.rm = T) * sd(all$PRS2,
na.rm = T))^2, (res[3, 2]/sd(all$ht, na.rm = T) * sd(all$PRS3,
na.rm = T))^2, (res[4, 2]/sd(all$ht, na.rm = T) * sd(all$PRS4,
na.rm = T))^2, (res[5, 2]/sd(all$ht, na.rm = T) * sd(all$PRS5,
na.rm = T))^2)
# To test the significance of the betas we can use t-tests
# We are calculating this and adding it to the res object
$tStat <- res$beta/res$se # Follows a student distribution
res$pval <- (1 - pt(q = res$tStat, df = 1887, lower.tail = T)) *
res2
# Now we can look at the results
res
A work by Sarah Medland & Baptiste Couvy-Duchesne