##### The purpose of this script is to perform TrioGCTA a variance decomposition method using genotyped # mother, father, offspring trios, developed by Espen Eilertsen (see https://github.com/espenmei/trio) #If you have any questions, please feel free to email Espen Eilertsen e.m.eilertsen@psykologi.uio.no; Ziada Ayorech #ziada.ayorech@psykologi.uio.no, and/ or Perline Demange perlined@psykologi.uio.no #Further details on trio GCTA are available in the manuscript PMID: 33387132 DOI: 10.1007/s10519-020-10036-6d ##### ########################################################################################################################### ##### Day 4 Practical: Trio GCTA # This practical includes questions which you can answer at Qualtrics -- we will discuss the interpretations when you are done # Qualtrics link: https://qimr.az1.qualtrics.com/jfe/form/SV_bjXq3wCxYVR0SEu ########################################################################################################################### ######################################### # STEP O: copy files over and set working directory ######################################### rm(list = ls()) #getwd() #check you are where you think you are: you should be here: /home/yourname/2024/Day-4/TrioGCTA_practical/input #setwd("/home/your_name/2024/day4_triogcta/TrioGCTA_practical/input") # if you need to change your WD change 'your_name' in this file path to your name ######################################### # STEP 1: Load packages and read in data ######################################### library(genio) # For reading plink format library(OpenMx) # For fitting your model library(gaston) # For computing your GRM (genetic relatedness matrix) #Read in simulated data dat = read.bed.matrix("simtrio") ################################################## # STEP 2: Compute GRM (genetic relatedness matrix) # ################################################## A = GRM(dat) #create a genetic relatedness matrix which is a 3000X3000 matrix including mother, father, offspring ################################################## # STEP 3: Subset GRM into blocks ################################################## mid = X:X # rather than X, place the correct range of values pid = X:X # rather than X, place the correct range of values oid = X:X # rather than X, place the correct range of values Amm = A[mid, mid] App = A[pid, pid] Aoo = A[oid, oid] Dpm = A[pid, mid] + A[mid, pid] Dom = A[oid, mid] + A[mid, oid] Dop = A[oid, pid] + A[pid, oid] # These blocks define the correlation structure of each family members effects i.e., direct indirect covariance # The implied structure of the model can be described by these blocks alone # NB!! this GRM becomes computationally intensive when working with larger data sets ., 60,000 trios # in that case we would recommend using the programming language Julia which is known for its speed in high performance computing # Contact Espen Eilertsen e.m.eilertsen@psykologi.uio.no for Julia scripts ################################################## # STEP 4: Define the trio model in open mx ################################################## # set up model yX = cbind(dat@ped$pheno[oid], 1) # create a matrix including phenotype data and a column of ones colnames(yX) = c("y", "x") K = 1000 # Number of trios #define the structural equation model in OpenMX modmx = mxModel("trio",# this creates a new structural equation model called 'trio' mxMatrix("Lo", 3, 3, T, sqrt(diag(var(yX[, 1]) / 4, 3)),# creates a lower triangular matrix (Lo) with three rows and three columns, the diagonal is set as the square root of 1/4th of the variance of the 1st column c("l11", "l21", "l31", "l22", "l32", "l33"), name = "L"), # Cholesky factor of genetic covariance matrix mxAlgebra(L %*% t(L), name = "Sg"), mxMatrix("Fu", 1, 1, T, var(yX[, 1]) / 4, "se", name = "Se"), # creates 1X1 matrix (Fu), representing the residual variance (Se) [Residual variance] mxMatrix("Sy", K, K, F, Amm, name = "Amm"),# Matrix for mother's direct genetic effects mxMatrix("Sy", K, K, F, App, name = "App"),# Matrix for father's indirect genetic effects mxMatrix("Sy", K, K, F, Aoo, name = "Aoo"),# Matrix for offspring's indirect genetic effects mxMatrix("Sy", K, K, F, Dpm, name = "Dpm"),# Matrix for covariance between mother direct and father indirect mxMatrix("Sy", K, K, F, Dom, name = "Dom"),# Matrix for covariance between mother direct and child indirect mxMatrix("Sy", K, K, F, Dop, name = "Dop"),# Matrix for covariance between child indirect and father indirect mxMatrix("Id", K, K, name = "I"),# Identity matrix - a square matrix where the diagonal is 1 and the off diagonal is 0 -- multiplying a given matrix by an identity matrix, leaves the given matrix unchanged mxData(yX, "raw", sort = F),# raw phenotype data # Model implied covariance mxAlgebra(Sg[1, 1] * Amm + Sg[2, 2] * App + Sg[3, 3] * Aoo + Sg[2, 1] * Dpm + Sg[3, 1] * Dom + Sg[3, 2] * Dop + se * I, name = "V"),# computes the sum of products of the matrices i.e., Implied covariance matrix mxExpectationGREML("V", dataset.is.yX = T), # specifies the GREML method for parameter estimation mxFitFunctionGREML()) ################################################## # STEP 5: Fit the model ################################################## modmx = mxRun(modmx) # OpenMX will try to find parameter estimates that maximise the fit of the observed data based on the specified constraints ################################################## # STEP 6: Pull out results ################################################## mxEval(Sg, modmx) # Genetic (co)variances where diagonal represents variance and off diagonal represents covariance mxEval(Se, modmx) # residual variance ################################################## # Bonus STEP 7: How would you amend this script if # you wanted mother as the focal individual ################################################## ################################################## # Bonus STEP 8: Amend the script to have father as #the focal individual. Does it run? If not..why? ##################################################