# ---------------------------------------------------------------------------------------------------------------------- # Program: happy2ACEc.R # Author: Jose J. Morosoli # Date: 03 06 2020 # # # VARIANCE COMPONENT APPROACH # # Twin Bivariate ACE model to estimate causes of variation across multiple groups # Version for debugging practical *FIXED version with solutions* # Matrix style model - Raw data - Continuous data # -------|---------|---------|---------|---------|---------|---------|---------|---------|---------|---------|---------| # PREPARE DATA library(OpenMx) library(psych) # Load Data goodsam <- read.table(file = "good_samaritan.txt", sep = "\t") #----CODING----# # SEX: 0=MALES, 1=FEMALES # ZYG: 1=MZ Female, 2=MZ Male, 3=DZ Female, 4=DZ Male # Quick look at the descriptives describe(goodsam) names(goodsam) # Reestructure data into family format famdata <- reshape(goodsam, direction = "wide", idvar = c("FAMID",'ZYG'), timevar = "TWID", sep = "_") # Check that it worked head(famdata) # Is there missing data in the covariates sum(is.na(famdata$AGE_1)==T) sum(is.na(famdata$AGE_2)==T) sum(is.na(famdata$SEX_1)==T) sum(is.na(famdata$SEX_2)==T) # Substitute missing values for non-sense value when the phenotype is also missing famdata$AGE_1[is.na(famdata$mmHg_1)==T&is.na(famdata$anger_1)==T] <- -999999 famdata$AGE_2[is.na(famdata$mmHg_2)==T&is.na(famdata$anger_2)==T] <- -999999 famdata$SEX_1[is.na(famdata$mmHg_1)==T&is.na(famdata$anger_1)==T] <- -999999 famdata$SEX_2[is.na(famdata$mmHg_2)==T&is.na(famdata$anger_2)==T] <- -999999 # Select Variables for Analysis vars <- c('mmHg','anger') # list of variables names nv <- 2 # number of variables ntv <- nv*2 # number of total variables selVars <- paste(vars,c(rep(1,nv),rep(2,nv)),sep="_") covVars <- c('AGE_1','AGE_2','SEX_1','SEX_2') # Select Data for Analysis mzData <- subset(famdata, ZYG<=2, c(selVars, covVars)) dzData <- subset(famdata, ZYG>=3, c(selVars, covVars)) # Apply mxData to data.frames so they are in a OpenMx friendly format dataMZ <- mxData( observed=mzData, type="raw" ) dataDZ <- mxData( observed=dzData, type="raw" ) # Create New Labels for means labMe <- paste("mean",vars,sep="_") # ---------------------------------------------------------------------------------------------------------------------- # PREPARE MODEL # Store value of covariates in mxMatrices defAge1 <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.AGE_1"), name="Age1" ) defAge2 <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.AGE_2"), name="Age2" ) defSex1 <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.SEX_1"), name="Sex1" ) defSex2 <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.SEX_2"), name="Sex2" ) # Matrices for regression coefficients # Beta of age for vD1con and vD3con pathB11 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=0.01, label="b11", name="beta11" ) pathB12 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=0.01, label="b12", name="beta12" ) # Beta of sex for vD1con and vD3con pathB21 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=1, label="b21", name="beta21" ) pathB22 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=1, label="b22", name="beta22" ) # Beta of age*sex interaction for vD1con and vD3con pathB31 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=-0.5, label="b31", name="beta31" ) pathB32 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=-0.5, label="b32", name="beta32" ) # Matrix & Algebra for expected means vector meanG <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=c(TRUE,TRUE), labels=labMe, values=c(120,5), name="mZ" ) expMean <- mxAlgebra( mZ + cbind(beta11*Age1,beta12*Age1,beta11*Age2,beta12*Age2) + cbind(beta21*Sex1,beta22*Sex1,beta21*Sex2,beta22*Sex2) + cbind(beta31*Age1*Sex1,beta32*Age1*Sex1,beta31*Age2*Sex2,beta32*Age2*Sex2), name="expM") pars <- list(pathB11, pathB21, pathB31, pathB12, pathB22, pathB32, meanG, expMean) defs <- list(defAge1, defAge2, defSex1, defSex2) # Just grouping objects together so it's easier to call them ################## EXPECTED COVARIANCES - VERSION 1 ######################## # STANDARD MATRIX VERSION: EASY TO MODIFY TO INCLUDE MORE VARIABLES # Create Matrices for Variance Components #covA <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=valDiag(svPa,nv), labels=labLower("VA",nv), name="VA" ) #covC <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=valDiag(svPa,nv), labels=labLower("VC",nv), name="VC" ) #covE <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=valDiag(svPe,nv), labels=labLower("VE",nv), name="VE" ) # Create Algebra for expected Variance/Covariance Matrices in MZ & DZ twins #covP <- mxAlgebra( expression= VA+VC+VE, name="V" ) #covMZ <- mxAlgebra( expression= VA+VC, name="cMZ" ) #covDZ <- mxAlgebra( expression= 0.5%x%VA+ VC, name="cDZ" ) #expCovMZ <- mxAlgebra( expression= rbind( cbind(V, cMZ), cbind(t(cMZ), V)), name="expCovMZ" ) #expCovDZ <- mxAlgebra( expression= rbind( cbind(V, cDZ), cbind(t(cDZ), V)), name="expCovDZ" ) #varComps <- list( covA, covC, covE, covP ) #################### EXPECTED COVARIANCES - VERSION 2 ##################### # FOR EDUCATIONAL PURPOSES< WE ARE GONNA HARDCODE OUR 9 PARAMETERS SO YOU CAN SEE HOW THEY REALLY LOOK # Within variable 1 parameters compA11 <- mxMatrix( type = "Full", nrow=1, ncol=1, free = T, values = 5, labels = "A11", name = "VA11") compC11 <- mxMatrix( type = "Full", nrow=1, ncol=1, free = T, values = -1, labels = "C11", name = "VC11") compE11 <- mxMatrix( type = "Full", nrow=1, ncol=1, free = T, values = 3, labels = "E11", name = "VE11") # Within variable 2 parameters compA22 <- mxMatrix( type = "Full", nrow=1, ncol=1, free = T, values = 1, labels = "A22", name = "VA22") compC22 <- mxMatrix( type = "Full", nrow=1, ncol=1, free = T, values = 1, labels = "C22", name = "VC22") compE22 <- mxMatrix( type = "Full", nrow=1, ncol=1, free = T, values = 2, labels = "E22", name = "VE22") # Across variables 1 and 2 parameters compA12 <- mxMatrix( type = "Full", nrow=1, ncol=1, free = T, values = 1, labels = "A12", name = "VA12") compC12 <- mxMatrix( type = "Full", nrow=1, ncol=1, free = T, values = -1, labels = "C12", name = "VC12") compE12 <- mxMatrix( type = "Full", nrow=1, ncol=1, free = T, values = 1, labels = "E12", name = "VE12") # Create Algebra for expected Variance/Covariance Matrices in MZ & DZ twins expCovMZ <- mxAlgebra( expression= rbind( cbind(VA11+VC11+VE11,VA12+VC12+VE12,VA11+VC11,VA12+VC12), cbind(VA12+VC12+VE12,VA22+VC22+VE22,VA12+VC12,VA22+VC22), cbind(VA11+VC11,VA12+VC12,VA11+VC11+VE11,VA12+VC12+VE12), cbind(VA12+VC12,VA22+VC22,VA12+VC12+VE12,VA22+VC22+VE22)), name="expCovMZ" ) expCovDZ <- mxAlgebra( expression= rbind( cbind(VA11+VC11+VE11,VA12+VC12+VE12,0.5*VA11+VC11,0.5*VA12+VC12), cbind(VA12+VC12+VE12,VA22+VC22+VE22,0.5*VA12+VC12,0.5*VA22+VC22), cbind(0.5*VA11+VC11,0.5*VA12+VC12,VA11+VC11+VE11,VA12+VC12+VE12), cbind(0.5*VA12+VC12,0.5*VA22+VC22,VA12+VC12+VE12,VA22+VC22+VE22)), name="expCovDZ" ) covA <- mxAlgebra( expression= rbind(cbind(VA11,VA12), cbind(VA12,VA22)), name="VA" ) covC <- mxAlgebra( expression= rbind(cbind(VC11,VC12), cbind(VC12,VC22)), name="VC" ) covE <- mxAlgebra( expression= rbind(cbind(VE11,VE12), cbind(VE12,VE22)), name="VE" ) covP <- mxAlgebra( expression= rbind(cbind(VA11+VC11+VE11,VA12+VC12+VE12), cbind(VA12+VC12+VE12,VA22+VC22+VE22)), name="V" ) varComps <- list(compA11,compC11,compE11,compA22,compC22,compE22,compA12,compC12,compE12,covA,covC,covE,covP) ################################################################ # And the rest of the code is putting models together and telling OpenMx where to look for objects # Create Expectation Objects for Multiple Groups expMZ <- mxExpectationNormal( covariance="expCovMZ", means="expM", dimnames=selVars ) expDZ <- mxExpectationNormal( covariance="expCovDZ", means="expM", dimnames=selVars ) funML <- mxFitFunctionML() # Apply mxData to data.frames so they are in a OpenMx friendly format dataMZ <- mxData( observed=mzData, type="raw" ) dataDZ <- mxData( observed=dzData, type="raw" ) # Put everything together in models # modelMZ <- mxModel( pars, defs, varComps, expCovMZ, dataMZ, expMZ, funML, name="MZ" ) modelDZ <- mxModel( pars, defs, varComps, expCovDZ, dataDZ, expDZ, funML, name="DZ" ) multi <- mxFitFunctionMultigroup( c("MZ","DZ") ) # Calculate genetic and environmental correlations # Create Algebra for Standardization matI <- mxMatrix( type="Iden", nrow=nv, ncol=nv, name="I") corA <- mxAlgebra( expression=solve(sqrt(I*VA))%&%VA, name ="rA" ) #operation equal to cov2cor() corC <- mxAlgebra( expression=solve(sqrt(I*VC))%&%VC, name ="rC" ) corE <- mxAlgebra( expression=solve(sqrt(I*VE))%&%VE, name ="rE" ) corP <- mxAlgebra (expression=solve(sqrt(I*V)) %*% V %*% solve(sqrt(I*V)), name="rP") # This sould be same as the one estimate from the saturated model and very similar to the one using just cor() # Create Algebra for Variance Components + % variance explained # rowUV <- rep('UV',nv) colUV <- rep(c('VA','VC','VE'),each=nv) estUV <- mxAlgebra( expression=cbind(VA,VC,VE), name="UV", dimnames=list(rowUV,colUV) ) rowSV <- rep('SV',nv) colSV <- rep(c('SA','SC','SE'),each=nv) estSV <- mxAlgebra( expression=cbind(VA/V,VC/V,VE/V), name="SV", dimnames=list(rowSV,colSV) ) # Create Confidence Interval Objects ciUV <- mxCI( c("UV","SV","rA","rC","rE","rP") ) # Build Model with Confidence Intervals calc <- list( matI, corA, corC, corE, corP,estUV, estSV, ciUV ) modelACE <- mxModel( "twoACEvc", varComps, modelMZ, modelDZ, multi, calc ) # ---------------------------------------------------------------------------------------------------------------------- # RUN MODEL mxOption(NULL,"Default optimizer","NPSOL") # this line changes the optimizing algorithm used by OpenMx, we'll talk more about in our magical session # Run ACE Model fitACE <- mxRun( modelACE, intervals=F ) sumACE <- summary( fitACE ) sumACE # Output: unstandardized solution mxEval(UV, fitACE) # Output: standardized solution mxEval(SV, fitACE) #### QUIZ ##### # Is the phenotypic correlation across traits significant? # Are we getting a similar phenotypic correlation within OpenMx to the one we estimated earlier? # Phenotipic correlation # cor(goodsam$mmHg,goodsam$anger) mxEval(rP, fitACE) # ---> we drop all covariance components ACE_noCov <- mxModel(fitACE, name = "ACE noCov") ACE_noCov <- omxSetParameters(ACE_noCov, labels = c("A12","C12","E12"), free = F, values = 0) fitACE_noCov <- mxRun(ACE_noCov) summary(fitACE_noCov) # TEST HYPOTHESIS by COMPARING new model with full ACE model mxCompare(fitACE,fitACE_noCov) # Can we assume that a no covariance model explain our data as well as one that allows the two phenotypes to be correlated? # Answer: No, we cannot ################################################################ ########### COMPUTING CIs WITH SNOWFALL / PARALLEL ############ ################################################################ # Computing CIs in parallel # WARNING! Do not use in clusters or high performance computers. Only in your laptop or after consulting with your IT department. library(snowfall) library(parallel) #require(parallel (ncores <- detectCores()) # How many cores sfInit(parallel=TRUE, cpus = ncores) sfLibrary(OpenMx) fitACE_Ci <- omxParallelCI( fitACE ) myCIs <- summary(fitACE_Ci) # Check CIs first summary(fitACE_Ci) summary(fitACE_Ci)$CIcodes summary(fitACE_Ci)$CI # Confidence intervals for correlations summary(fitACE_CIr) summary(fitACE_CIr)$CIcodes summary(fitACE_CIr)$CI # NOTE: mxEval() is a function we use to extract values from the fitted model, we need the NAME of the matrix and the name of the fitted model # Standardized H2, C2, E2 estimates for each trait (univariate estimates) mxEval(UV, fitACE_Ci) # And with confidence intervals # myCIs$CI[grep(pattern = "SV",row.names(myCIs$CI)),] #### Unstandardized values ###### myCIs$CI[grep(pattern = "UV",row.names(myCIs$CI)),] # A for trait 1 # mxEval(VA11, fitACE_Ci) # And now with its confidence interval myCIs$CI[grep(pattern = mxEval(VA11, fitACE_Ci),myCIs$CI$estimate),] # C for trat 1 # mxEval(VC11, fitACE_Ci) # And now with its confidence interval myCIs$CI[grep(pattern = mxEval(VC11, fitACE_Ci),myCIs$CI$estimate),] # E for trait 1 # mxEval(VE11, fitACE_Ci) # And now with its confidence interval myCIs$CI[grep(pattern = mxEval(VE11, fitACE_Ci),myCIs$CI$estimate),] # A for trat 2 # mxEval(VA22, fitACE_Ci) # And now with its confidence interval myCIs$CI[grep(pattern = mxEval(VA22, fitACE_Ci),myCIs$CI$estimate),] # VC for trait 2 # mxEval(VC22, fitACE_Ci) # And now with its confidence interval myCIs$CI[grep(pattern = mxEval(VC22, fitACE_Ci),myCIs$CI$estimate),] # VE for trait 2 # mxEval(VE22, fitACE_Ci) # And now with its confidence interval myCIs$CI[grep(pattern = mxEval(VE22, fitACE_Ci),myCIs$CI$estimate),] # rA # mxEval(rA, fitACE_CIr) # And now with its confidence interval summary(fitACE_CIr)$CI[grep(pattern = mxEval(rA, fitACE_CIr)[2,1],summary(fitACE_CIr)$CI$estimate),] # rC # mxEval(rC, fitACE_CIr) # And now with its confidence interval summary(fitACE_CIr)$CI[grep(pattern = mxEval(rC, fitACE_CIr)[2,1],summary(fitACE_CIr)$CI$estimate),] # rE # mxEval(rE, fitACE_CIr) # And now with its confidence interval summary(fitACE_CIr)$CI[grep(pattern = mxEval(rE, fitACE_CIr)[2,1],summary(fitACE_CIr)$CI$estimate),] # rP # Good sanity check mxEval(rP, fitACE_CIr) # And now with its confidence interval summary(fitACE_CIr)$CI[grep(pattern = mxEval(rP, fitACE_CIr)[2,1],summary(fitACE_CIr)$CI$estimate),] # Var1 - Var2 # This one is coming from the saturated model SumCis$CI[2,] SumCis$CI[18,] # prop% phenotypic covariance due to A # mxEval(SV[1,2], fitACE_Ci) # And now with its confidence interval myCIs$CI[grep(pattern = mxEval(SV[1,2], fitACE_Ci),myCIs$CI$estimate),] # prop% phenotypic covariance due to C # mxEval(SV[1,4], fitACE_Ci) # And now with its confidence interval myCIs$CI[grep(pattern = mxEval(SV[1,4], fitACE_Ci),myCIs$CI$estimate),] # prop% phenotypic covariance due to E # mxEval(SV[1,6], fitACE_Ci) # And now with its confidence interval myCIs$CI[grep(pattern = mxEval(SV[1,6], fitACE_Ci),myCIs$CI$estimate),] # It should add up to 1 mxEval(SV[1,2]+SV[1,4]+SV[1,6], fitACE_Ci)