# ---------------------------------------------------------------------------------------------------------------------- # Program: happy2SATc.R # Author: Jose J. Morosoli # Date: 03 06 2020 # # # Twin Bivariate Saturated model to estimate causes of variation across multiple groups # Companion of debugging practical The Good Samaritan # Matrix style model - Raw data - Continuous data # -------|---------|---------|---------|---------|---------|---------|---------|---------|---------|---------|---------| # PREPARE DATA library(OpenMx) library(psych) # Load Data goodsam <- read.table(file = "good_samaritan.txt", sep = "\t") # Quick look at the descriptives describe(goodsam) names(goodsam) # Reestructure data into family format famdata <- reshape(goodsam, direction = "wide", idvar = c("FAMID",'ZYG'), timevar = "TWID", sep = "_") # Check that it worked head(famdata) # Substitute missing values for non-sense value when the phenotype is also missing famdata$AGE_1[is.na(famdata$mmHg_1)==T&is.na(famdata$anger_1)==T] <- -999999 famdata$AGE_2[is.na(famdata$mmHg_2)==T&is.na(famdata$anger_2)==T] <- -999999 famdata$SEX_1[is.na(famdata$mmHg_1)==T&is.na(famdata$anger_1)==T] <- -999999 famdata$SEX_2[is.na(famdata$mmHg_2)==T&is.na(famdata$anger_2)==T] <- -999999 # Select Variables for Analysis vars <- c('mmHg','anger') # list of variables names nv <- 2 # number of variables ntv <- nv*2 # number of total variables selVars <- paste(vars,c(rep(1,nv),rep(2,nv)),sep="_") covVars <- c('AGE_1','AGE_2','SEX_1','SEX_2') # Select Data for Analysis mzData <- subset(famdata, ZYG<=2, c(selVars, covVars)) dzData <- subset(famdata, ZYG>=3, c(selVars, covVars)) # Let's take a look to the variances var(mzData[,1:4], na.rm = T) var(dzData[,1:4], na.rm = T) # Apply mxData to data.frames so they are in a OpenMx friendly format dataMZ <- mxData( observed=mzData, type="raw" ) dataDZ <- mxData( observed=dzData, type="raw" ) # ------------ --------------------------------------------- # Compute starting values for the expected covariance matrix # This is basically building by hand the expected matrix of starting values for MZ and DZ svCvMZ <- c( var(mzData[,1], use = "complete.obs"), # Variance P1 (Twin 1) cov(mzData[,1],mzData[,2], use = "complete.obs" ), # Covariance P1-P2 (Twin 1) cov(mzData[,1],mzData[,3], use = "complete.obs"), # Covariance within P1 across twins cov(mzData[,1],mzData[,4], use = "complete.obs"), # Cross-twin (T1-T2), cross-trait (P1-P2) var(mzData[,2], use = "complete.obs"), # Variance P2 (Twin 1) *fixed to 1, original: var(mzmData$EQ5D_Anx_Bin1, use = "complete.obs") cov(mzData[,3],mzData[,2], use = "complete.obs"), # Cross-twin (T1-T2), cross-trait (P2-P1) cov(mzData[,2],mzData[,4], use = "complete.obs"), # Covariance within P2 across twins var(mzData[,3], use = "complete.obs"), # Variance P1 (Twin 2) cov(mzData[,3],mzData[,4], use = "complete.obs"), # Covariance P1-P2 (Twin 2) var(mzData[,4], use = "complete.obs")) # Variance P2 (Twin 2) *fixed to 1, original: var(mzmData$EQ5D_Anx_Bin1, use = "complete.obs") svCvMZ svCvDZ <- c( var(dzData[,1], use = "complete.obs"), # Variance P1 (Twin 1) cov(dzData[,1],dzData[,2], use = "complete.obs" ), # Covariance P1-P2 (Twin 1) cov(dzData[,1],dzData[,3], use = "complete.obs"), # Covariance within P1 across twins cov(dzData[,1],dzData[,4], use = "complete.obs"), # Cross-twin (T1-T2), cross-trait (P1-P2) var(dzData[,2], use = "complete.obs"), # Variance P2 (Twin 1) *fixed to 1, original: var(mzmData$EQ5D_Anx_Bin1, use = "complete.obs") cov(dzData[,3],dzData[,2], use = "complete.obs"), # Cross-twin (T1-T2), cross-trait (P2-P1) cov(dzData[,2],dzData[,4], use = "complete.obs"), # Covariance within P2 across twins var(dzData[,3], use = "complete.obs"), # Variance P1 (Twin 2) cov(dzData[,3],dzData[,4], use = "complete.obs"), # Covariance P1-P2 (Twin 2) var(dzData[,4], use = "complete.obs")) # Variance P2 (Twin 2) *fixed to 1, original: var(mzmData$EQ5D_Anx_Bin1, use = "complete.obs") svCvDZ # ------------ --------------------------------------------- # Create Labels for means labMeMZ <- paste("meanMZ",selVars,sep="_") labMeDZ <- paste("meanDZ",selVars,sep="_") # Create Labels for covariance matrices labCvMZ <- c("varP1T1_MZ", "covP1P2T1_MZ", "witrP1_MZ","crtwcrtrT1P1T2P2_MZ", "varP2T1_MZ", "crtwcrtrT1P2T2P1_MZ", "witrP2_MZ", "varP1T2_MZ", "covP1P2T2_MZ", "varP2T2_MZ") labCvDZ <- c("varP1T1_DZ", "covP1P2T1_DZ", "witrP1_DZ","crtwcrtrT1P1T2P2_DZ", "varP2T1_DZ", "crtwcrtrT1P2T2P1_DZ", "witrP2_DZ", "varP1T2_DZ", "covP1P2T2_DZ", "varP2T2_DZ") # Estimate Starting Values for means (based on raw data, you could find a more accurate starting value extracting the results from the regression of age*sex using lm() or similar) # These are only shortcuts for accessing values more quickly (svMeMZ <- c(mean(mzData[,1],na.rm=T),mean(mzData[,2],na.rm=T),mean(mzData[,3],na.rm=T),mean(mzData[,4],na.rm=T))) (svMeDZ <- c(mean(dzData[,1],na.rm=T),mean(dzData[,2],na.rm=T),mean(dzData[,3],na.rm=T),mean(dzData[,4],na.rm=T))) # Saturated Model ##################################################################################### # Store value of covariates in mxMatrices defAge1 <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.AGE_1"), name="Age1" ) defAge2 <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.AGE_2"), name="Age2" ) defSex1 <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.SEX_1"), name="Sex1" ) defSex2 <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.SEX_2"), name="Sex2" ) # Matrices for regression coefficients # Beta of age for vD1con and vD3con pathB11 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=0.01, label="b11", name="beta11" ) pathB12 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=0.01, label="b12", name="beta12" ) # Beta of sex for vD1con and vD3con pathB21 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=0.5, label="b21", name="beta21" ) pathB22 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=0.5, label="b22", name="beta22" ) # Beta of age*sex interaction for vD1con and vD3con pathB31 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=-0.1, label="b31", name="beta31" ) pathB32 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=-0.1, label="b32", name="beta32" ) regres <- c(defAge1, defAge2, defSex1, defSex2, pathB11, pathB12, pathB21, pathB22, pathB31, pathB32) # Just grouping objects together so it's easier to call them # Matrix & Algebra for expected means vector meanMZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=c(TRUE,TRUE), labels=labMeMZ, values=svMeMZ, name="mMZ" ) meanDZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=c(TRUE,TRUE), labels=labMeDZ, values=svMeDZ, name="mDZ" ) expMeanMZ <- mxAlgebra( mMZ + cbind(beta11*Age1,beta12*Age1,beta11*Age2,beta12*Age2) + cbind(beta21*Sex1,beta22*Sex1,beta21*Sex2,beta22*Sex2) + cbind(beta31*Age1*Sex1,beta32*Age1*Sex1,beta31*Age2*Sex2,beta32*Age2*Sex2), name="expMeanMZ") expMeanDZ <- mxAlgebra( mDZ + cbind(beta11*Age1,beta12*Age1,beta11*Age2,beta12*Age2) + cbind(beta21*Sex1,beta22*Sex1,beta21*Sex2,beta22*Sex2) + cbind(beta31*Age1*Sex1,beta32*Age1*Sex1,beta31*Age2*Sex2,beta32*Age2*Sex2), name="expMeanDZ") # Create matrices for expected covariance Matrices covMZ <- mxMatrix( type="Symm", nrow=ntv, ncol=ntv, free=c(T), values=svCvMZ, labels=labCvMZ, name="covMZ" ) covDZ <- mxMatrix( type="Symm", nrow=ntv, ncol=ntv, free=c(T), values=svCvDZ, labels=labCvDZ, name="covDZ" ) # Correlations matI <- mxMatrix( type="Iden", nrow=ntv, ncol=ntv, name="I" ) corMZ <- mxAlgebra( solve(sqrt(I*covMZ)) %&% covMZ, name="rMZ" ) corDZ <- mxAlgebra( solve(sqrt(I*covDZ)) %&% covDZ, name="rDZ" ) pars <- list( regres, matI ) # Just grouping objects together so it's easier to call them # Tell OpenMx what are our means and covariances expected model objMZ <- mxExpectationNormal( covariance="covMZ", means="expMeanMZ", dimnames=selVars ) objDZ <- mxExpectationNormal( covariance="covDZ", means="expMeanDZ", dimnames=selVars ) # Specify that we are gonna use Maximum Likelihood fitFunction <- mxFitFunctionML() # Remember that twin models are a SEM multigroup approach with 2 groups that must be identified within group: MZ and DZ modelMZ <- mxModel( pars, meanMZ, expMeanMZ, covMZ, corMZ, dataMZ, objMZ, fitFunction, name="MZ" ) modelDZ <- mxModel( pars, meanDZ, expMeanDZ, covDZ, corDZ, dataDZ, objDZ, fitFunction, name="DZ" ) minus2ll <- mxFitFunctionMultigroup(c('MZ','DZ')) # Create confidence interval objects ciCor <- mxCI( c('MZ.rMZ','DZ.rDZ')) # Build Saturated Model - this model includes the 2 subroups MZ and DZ model <- mxModel( "twoSATcon", modelMZ, modelDZ, minus2ll, ciCor) # ------------------------------------------------------------------------------ # RUN MODEL # Select optimizer and specific options for numerical precision of the integration of the multivariate normal # More info: http://openmx.psyc.virginia.edu/thread/3868 mxOption(NULL,"Default optimizer","NPSOL") # *Not statistically relevant, only for computational purposes* # Run Saturated Model # In order to reduce computation time, "intervals" argument is set to FALSE # If you want to estimate de CIs, you have to change FALSE for TRUE fit <- mxRun( model, intervals=F ) fit <- mxTryHard( model, intervals=F ) sum <- summary( fit ) sum # You can see here the saturated (freely estimated) MZ and DZ covariance matrices mxEval(MZ.covMZ, model = fit) mxEval(DZ.covDZ, model = fit) # EQUALITY OF MEANS, THRESHOLDS, VARIANCES AND COVARIANCES ACROSS TWIN ORDER # 1. Constrain expected Thresholds, Means and Variances (Continous variable) to be equal across twin order # OSDZ not included EqTwModel <- mxModel( fit, name = "EqTw") # Means and thresholds the same across twins EqTwModel <- omxSetParameters( EqTwModel, label=c(labMeMZ), newlabels=c("meanMZ_P1","meanMZ_P2")) EqTwModel <- omxSetParameters( EqTwModel, label=c(labMeDZ), newlabels=c("meanDZ_P1","meanDZ_P2")) # Variance of BMI the same across twins EqTwModel <- omxSetParameters( EqTwModel, label=c("varP1T1_MZ","varP1T2_MZ"), newlabels=c("varP1_MZ")) EqTwModel <- omxSetParameters( EqTwModel, label=c("varP1T1_DZ","varP1T2_DZ"), newlabels=c("varP1_DZ")) # Variance of SSI the same across twins EqTwModel <- omxSetParameters( EqTwModel, label=c("varP2T1_MZ","varP2T2_MZ"), newlabels=c("varP2_MZ")) EqTwModel <- omxSetParameters( EqTwModel, label=c("varP2T1_DZ","varP2T2_DZ"), newlabels=c("varP2_DZ")) # Covariance of P1 and P2 the same across twins EqTwModel <- omxSetParameters( EqTwModel, label=c("covP1P2T1_MZ","covP1P2T2_MZ"), newlabels=c("covP1P2_MZ")) EqTwModel <- omxSetParameters( EqTwModel, label=c("covP1P2T1_DZ","covP1P2T2_DZ"), newlabels=c("covP1P2_DZ")) # Cross-twin Cross-Trait the same across twins EqTwModel <- omxSetParameters( EqTwModel, label=c("crtwcrtrT1P2T2P1_MZ","crtwcrtrT1P1T2P2_MZ"), newlabels=c("crtwcrtr_MZ")) EqTwModel <- omxSetParameters( EqTwModel, label=c("crtwcrtrT1P2T2P1_DZ","crtwcrtrT1P1T2P2_DZ"), newlabels=c("crtwcrtr_DZ")) EqTwModel <- omxAssignFirstParameters( EqTwModel ) EqTwFit <- mxRun( EqTwModel ) # Always check if the parametres make sense summary(EqTwFit) summary(EqTwFit)$statusCode[1] # Check if equating Twin 1 with Twin 2 leads to a significant worsening of the fit mxCompare(fit, EqTwFit) # Nope it does not # EQUALITY OF MEANS, THRESHOLDS, VARIANCES AND COVARIANCES ACROSS ZYGOSITY # 2. Constrain expected Thresholds, Means and Variances (Continuous variable) to be equal across twin order and zygosity # OSDZ not included EqZygModel <- mxModel( EqTwFit, name="EqZyg" ) # Means the same across zygosity groups EqZygModel <- omxSetParameters( EqZygModel, label=c("meanMZ_P1","meanDZ_P1"), newlabels=c("meanZ_P1")) EqZygModel <- omxSetParameters( EqZygModel, label=c("meanMZ_P2","meanDZ_P2"), newlabels=c("meanZ_P2")) # Variance the same across zygosity groups EqZygModel <- omxSetParameters( EqZygModel, label=c("varP1_MZ","varP1_DZ"), newlabels=c("varP1_Z")) EqZygModel <- omxSetParameters( EqZygModel, label=c("varP2_MZ","varP2_DZ"), newlabels=c("varP2_Z")) # Covariance of BMI and SSI the same across twins EqZygModel <- omxSetParameters( EqZygModel, label=c("covP1P2_MZ","covP1P2_DZ"), newlabels=c("covP1P2_Z")) # Fix issues with having different starting values for the same label EqZygModel <- omxAssignFirstParameters( EqZygModel ) # Run! EqZygFit <- mxRun( EqZygModel, intervals=F) #EqZygFit <- mxTryHard( EqZygModel, intervals=F, exhaustive=F, extraTries=20) # Check estimates (have we equated what we wanted?) and status summary(EqZygFit) summary(EqTwFit)$statusCode[1] # Check if equating zygosities leads to a significant worsening of the fit mxCompare(EqTwFit, EqZygFit) # Nope, all good! # Here we can check how the covariance matrices look after testing if we can assume that twins and zygosities behave similarly mxEval(MZ.covMZ, model = EqZygFit) mxEval(DZ.covDZ, model = EqZygFit) mxEval(MZ.rMZ, EqZygFit) mxEval(DZ.rDZ, EqZygFit) # Extract correlations from last step of assumption testing # They will tell us what will happen with our ACE model # Compute the last model but now with intervals EqZygFit_Ci <- mxRun( EqZygFit, intervals = T ) SumCis <- summary(EqZygFit_Ci) # Computing CIs in parallel # WARNING! Do not use in clusters or high performance computers. Only in your laptop or after consulting with your IT department. library(snowfall) library(parallel) #require(parallel (ncores <- detectCores()) # How many cores sfInit(parallel=TRUE, cpus = ncores) sfLibrary(OpenMx) EqZygFit_Ci <- omxParallelCI( EqZygFit ) SumCis <- summary(EqZygFit_Ci) # Here, key information extracted from the summary object # Phenotipic correlation # Var1 - Var2 SumCis$CI[2,] SumCis$CI[18,] # Within trait correlations # MZ # # Var1 SumCis$CI[3,] # Var2 SumCis$CI[8,] # DZ # # Var1 SumCis$CI[19,] # Var2 SumCis$CI[24,] # XtwinXtrait correlations # # MZ SumCis$CI[4,] # DZ SumCis$CI[20,]