# Copyright 2019-2020 by Robert M. Kirkpatrick # Licensed under CC BY 4.0 # This is an adaptation of a script previously used at the 2017 AGES Workshop at Virginia Commonwealth University. #This script demonstrates the use of the GREML feature in a simple but realistic example. #It first loads a genomic-relatedness matrix (GRM) and a file containing the phenotype and a covariate. Then, it #fits a simple GREML model to estimate additive-genetic variance, residual variance, and heritability. #The GRM is 1000x1000, and was computed from 50,000 simulated SNPs in linkage equilibrium. #Note that this analysis is simple enough that it could be done in GCTA (and GCTA would do it more quickly). require(OpenMx) #We'll use SLSQP: mxOption(NULL,"Default optimizer","SLSQP") #More threads means faster performance but larger memory footprint #(however, this script cannot use more than 2 threads): mxOption(NULL,"Number of threads",2) options(mxCondenseMatrixSlots=TRUE) #<--Saves memory #You need to set R's working directory to the directory containing the data files for this demo. #(i.e., YOU MUST CHANGE THE NEXT LINE TO REFLECT WHERE, IN YOUR HOME DIRECTORY, YOU'VE PLACED THE DATA FILES): setwd("/faculty/kirkpatrick/2020/GREML/data/") #Total number of participants: N <- 1000 # Load and check data: ################################################################## #Load the GRM's data. Argument 'prefix' to ReadGRMBin() should be everything in the filename and path of one of the GRM's #that precedes the ".grm" : grmstuff <- omxReadGRMBin(prefix="AGES2017GRM",returnList=T) closeAllConnections() str(grmstuff) #^^^grmstuff will be a list of 4 elements: # $diag is a numeric vector containing the GRM's diagonal elements. # $off is a numeric vector containing the GRM's off-diagonal elements. # $id is a dataframe that contains the family and individual IDs orresponding to the rows and columns of the GRM. # $N is the number of markers used to compute the GRM. #Use the elements of grmstuff to make a proper GRM: #Initialize: GRM <- matrix( 0.0,nrow=N,ncol=N, dimnames=list(grmstuff$id$V1+grmstuff$id$V2,grmstuff$id$V1+grmstuff$id$V2)) #^^^We're giving the GRM dimnames based on IDs. #Populate the upper triangle of the GRM, excluding diagonal, with grmstuff$off: GRM[!lower.tri(GRM,diag=T)] <- grmstuff$off #Populate the lower triangle, excluding diagonal, of the GRM: GRM <- GRM + t(GRM) #Populate the diagonal of the GRM: diag(GRM) <- grmstuff$diag #Look at upper left corner of GRM: GRM[1:5,1:5] #Load phenotype file: phenfile <- read.csv("GREMLphenodata.csv",header=T) #The columns are family ID, individual ID, phenotype, and covariate: head(phenfile) #There is one missing value for the phenotype and for the covariate: sum(is.na(phenfile$y)) sum(is.na(phenfile$x)) #Give phenfile rownames based on IDs: rownames(phenfile) <- phenfile$famid + phenfile$id #Check to make sure that there are no individuals represented in the GRM but not in the phenotype file (there won't #be, since the data are simulated, but this is a good check to make when working with real data): all(rownames(GRM) %in% rownames(phenfile)) #<--Should return TRUE. #And check vice versa: all(rownames(phenfile) %in% rownames(GRM)) #<--Should also be TRUE. #In fact, the rows of phenfile are matched with those of the GRM: all(rownames(phenfile)==rownames(GRM)) #<--TRUE. # Create MxData object & expectation; get empirical start values: ########################################## #The MxData object. We will use gremldat$yX as the raw dataset to be used in our MxModel: mxdat <- mxData(observed=cbind(y=phenfile$y,x=phenfile$x), type="raw", sort=FALSE) #^^^N.B. the 'sort=FALSE' argument! I CANNOT EMPHASIZE ENOUGH HOW IMPORTANT THAT IS!!! By default, OpenMx #automatically sorts the rows of a raw dataset at runtime, to try to achieve a perfomance boost. However, we #currently have the rows of y and X sorted exactly the same as in the GRM, so we do not want OpenMx to do any #sorting! #We tell the GREML espectation that the name of the model-expected covariance matrix will be "V", #that the one phenotype has column label 'y', that the one covariate has column label 'x', #and that a lead column of ones needs to be appended to the 'X' matrix (for the intercept): ge <- mxExpectationGREML(V="V",yvars="y", Xvars="x", addOnes=T) #Let's get the residual variance from an OLS regression of the phenotype onto the covariate. We'll use half #this residual variance as the start value for the additive-genetic and nonshared-environmental variance #components: orv <- var(lm(y~x, data=phenfile)$residuals) # Create the MxModel, and run it: ############################################################################### #(We will create a lot of objects inside the mxModel() statement. This helps to save memory.) testmod <- mxModel( "GREML_1GRM_1trait", #<--Model name #1x1 matrix containing nonshared-environmental (residual) variance component: mxMatrix(type = "Full", nrow = 1, ncol=1, free=T, values = orv/2, labels = "ve", lbound = 0.0001, name = "Ve"), #1x1 matrix containing additive-genetic variance component: mxMatrix(type = "Full", nrow = 1, ncol=1, free=T, values = orv/2, labels = "va", name = "Va"), #1000x1000 identity matrix--the "relatedness matrix" for the residuals: mxMatrix("Iden",nrow=N,name="I"), #The GRM: mxMatrix("Symm",nrow=N,free=F,values=GRM,name="A"), #The model-expected covariance matrix: mxAlgebra((A%x%Va) + (I%x%Ve), name="V"), #<--Readable, but could be made more efficient. #An MxAlgebra for the heritability: mxAlgebra(Va/(Va+Ve), name="h2"), mxdat, #<--MxData object ge, #<--GREML expectation mxFitFunctionGREML() #<--GREML fitfunction ) #Run and view summary: testrun <- mxRun(testmod) summary(testrun) mxEval(h2,testrun,T) mxSE(h2,testrun)