# Multiple-indicator DOC Model # MadhurBain Singh, Nathan Gillespie, Hermine H. Maes # March 8, 2024 ## Qualtrics: # https://qimr.az1.qualtrics.com/jfe/form/SV_eW1CeUT3ZibFMzQ library(OpenMx) # mxOption(NULL, 'Number of Threads', parallel::detectCores()) mxOption(NULL, "Default optimizer", "CSOLNP") mxVersion() # Select Data ============================================================== dataFile <- "~/doc/Data/ACE_DoC_model_sim_data.tsv" docData <- read.table(dataFile, header = T) ## Look at the data head(docData) ## The first few rows dim(docData) ## Dimensions: nr of rows and nr of columns ## Check the zygosity distribution table(docData$zyg) ## Note the variable names (for one twin) from the data ## Observed/manifest indicator variables in the dataset vars <- c( ## 3 Indicators of Phenotype X "X1","X2", "X3", ## 3 Indicators of Phenotype Y "Y1", "Y2", "Y3" ) nv <- length(vars) # Number of manifest/observed variables per twin ## Duplicate the variable names for both twins ## Add appropriate suffix to the variable names to indicate twin 1 and twin 2 ## Order of variables: all vars for T1, then all vars for T2 in the same order selVars <- paste(vars, c(rep("T1",nv),rep("T2",nv)), sep = "_") selVars ## Latent common factors, i.e., the true score (one for each phenotype) latFacs <- c("Fx","Fy") nFac <- length(latFacs) # Number of latent factor true scores or common pathways ## Subset Data by zygosity ## Keep the selected variables mzdata <- subset(docData, subset = docData$zyg == "MZ", select = selVars) dzdata <- subset(docData, subset = docData$zyg == "DZ", select = selVars) str(mzdata) str(dzdata) # Descriptive Statistics ====================================================== psych::describe(mzdata) psych::describe(dzdata) # Print Means round(colMeans(mzdata,na.rm=TRUE),4) round(colMeans(dzdata,na.rm=TRUE),4) # Cross-twin within-trait correlations twin_pair_corrs <- cbind( diag2vec(round(cor(mzdata[selVars],use="complete"),4)[7:12,1:6]), diag2vec(round(cor(dzdata[selVars],use="complete"),4)[7:12,1:6]) ) colnames(twin_pair_corrs) <- c("MZ","DZ") rownames(twin_pair_corrs) <- vars twin_pair_corrs # MZ DZ # X1 0.4786 0.2514 # X2 0.4695 0.2420 # X3 0.4771 0.2533 # Y1 0.5441 0.4052 # Y2 0.5336 0.3953 # Y3 0.5197 0.4008 # Matrices in the full D0C model ================================= # Vf = common factor covariance matrix (nFac*nFac) (3 componenets: Af, Cf, Ef) # Β = causal parameters (nFac*nFac) # I = Identity matrix (nFac*nfac) # Fl = factor loadings (nv*nFac) # Vs = Manifest indicator-specific covariance (nv*nv) (3 componenets: As, Cs, Es) # (including measurement error) # Build multiple-indicator ACE model for two latent phenotypes =========== ## Specifies three sources of covariance: covA, covC, covE ## i.e., all model-expected covariance is due to background covariance (confounding), genetic and environmental ## There is no causal path between the phenotypes ## The best-fitting covariance-only model is your "null" model for the DoC model. ## ACE Variance of the latent common factors ================================ # A, C & E variance components of the latent factors (= true phenotype scores) # NB: direction of causation goes DOWN the column & OUT along the row # Matrix dimension = nFac x nFac aFree <- c(T,T,T) cFree <- c(T,T,T) eFree <- c(T,T,T) aVals <- c(0.4,0.2,0.4) cVals <- c(0.2,0.1,0.2) eVals <- c(0.4,0.2,0.4) Af <- mxMatrix(type = "Symm", nrow = nFac,ncol=nFac, labels=c("Af_x","covAf","Af_y"), free=aFree, values=aVals, dimnames = list(latFacs,latFacs), name="Af") Cf <- mxMatrix(type = "Symm", nrow = nFac,ncol=nFac, labels=c("Cf_x","covCf","Cf_y"), free=cFree, values=cVals, dimnames = list(latFacs,latFacs), name="Cf") Ef <- mxMatrix(type = "Symm", nrow = nFac,ncol=nFac, labels=c("Ef_x","covEf","Ef_y"), free=eFree, values=eVals, dimnames = list(latFacs,latFacs), name="Ef") ## Examine the matrices ## Dimensions, parameter labels, free/fixed parameters, starting values Af Cf Ef ## B - causal parameters between latent factors ================================ # NB: direction of causation goes DOWN the column & OUT the row # Matrix dimension = nFac x nFac ## We will initially fit a diphenotype twin model (two latent phenotypes) without causal paths ## So, the regression/causal paths (in the B matrix) are all fixed at 0 (for now). Bfree <- matrix(c(F,F, F, F), byrow = T, nrow = nFac, ncol = nFac, dimnames = list(latFacs,latFacs)) Bvals <- matrix(c(0,0, 0, 0), byrow = T, nrow = nFac, ncol = nFac, dimnames = list(latFacs,latFacs)) Blabs <- matrix(c(NA,"bxy", "byx",NA), byrow = T, nrow = nFac, ncol = nFac, dimnames = list(latFacs,latFacs)) B <- mxMatrix(type="Full", nrow=nFac, ncol=nFac, labels=Blabs, free=Bfree, values=Bvals, dimnames = list(latFacs,latFacs), name = "B" ) I <- mxMatrix(type="Iden", nrow=nFac, ncol=nFac, dimnames = list(latFacs,latFacs), name = "I") ## Examine the matrices ## Dimensions, parameter labels, free/fixed parameters, starting values B I ## Factor Loadings ==================================================== # Factor loadings from the latent true scores to the observed (indicator) phenotypes # Matrix dimension = nv x nFac Fl_free <- matrix(c( T,F, T,F, T,F, F,T, F,T, F,T), byrow = T, nrow = nv, ncol = nFac, dimnames = list(vars,latFacs)) Fl_vals <- matrix(c( 0.8,0.0, 0.8,0.0, 0.8,0.0, 0.0,0.8, 0.0,0.8, 0.0,0.8), byrow = T, nrow = nv, ncol = nFac, dimnames = list(vars,latFacs)) Fl_labs <- matrix(c( "f_x1Fx", NA, "f_x2Fx", NA, "f_x3Fx", NA, NA, "f_y1Fy", NA, "f_y2Fy", NA, "f_y3Fy"), byrow = T, nrow = nv, ncol = nFac, dimnames = list(vars,latFacs)) Fl <- mxMatrix(type="Full", nrow=nv, ncol=nFac, free = Fl_free, values = Fl_vals, labels=Fl_labs, dimnames = list(vars,latFacs), name = "Fl") ## Examine the matrix ## Dimensions, parameter labels, free/fixed parameters, starting values Fl ## Observed Indicator-Specific Variance ===================================== # Variance of the observed/manifest indicator variable not explained by the latent true score # Matrix dimension = nv x nv As_labs <- c("As_X1","As_X2","As_X3","As_Y1","As_Y2","As_Y3") Cs_labs <- c("Cs_X1","Cs_X2","Cs_X3","Cs_Y1","Cs_Y2","Cs_Y3") Es_labs <- c("Es_X1","Es_X2","Es_X3","Es_Y1","Es_Y2","Es_Y3") As <- mxMatrix(type = "Diag", nrow=nv, ncol=nv, labels=As_labs, free=T, values=0.05, dimnames = list(vars,vars), name="As") Cs <- mxMatrix(type = "Diag", nrow=nv, ncol=nv, labels=Cs_labs, free=T, values=0.05, dimnames = list(vars,vars), name="Cs") Es <- mxMatrix(type = "Diag", nrow=nv, ncol=nv, labels=Es_labs, free=T, lbound=0, values=0.1, dimnames = list(vars,vars), name="Es") ## Es captures the measurement error in the observed variables ## We put a lower bound of 0 on the estimates of Es ## as measurement error >= 0 ## Note that these are diagonal matrices. ## i.e., it has the all off-diagonal elements set at 0 ## i.e., we are assuming that there is no covariance between the observed indicators ## above and beyond the covariance due to the common factors (the latent phenotypes) ## This also means that we assume that the measurement error in one indicator ## is not correlated with the measurement error in the other indicators. ## Examine the matrices ## Dimensions, parameter labels, free/fixed parameters, starting values As Cs Es ## Expected covariance ====================================== # Matrix dimension = nv x nv # Three separate matrices = A, C, E # Algebra = ( Fl * (I-Β)~ * Vf * (I-Β)~' * Fl' ) + Vs # Fl = factor loadings # I = Identity matrix # Β = causal parameters # Vf = common factor (co)variance matrix (= Af | Cf | Ef) # Vs = Manifest indicator-specific (co)variance matrix (= As | Cs | Es) A <- mxAlgebra( Fl %&% (solve(I-B) %&% Af) + As, dimnames = list(vars,vars), name = "A") C <- mxAlgebra( Fl %&% (solve(I-B) %&% Cf) + Cs, dimnames = list(vars,vars), name = "C") E <- mxAlgebra( Fl %&% (solve(I-B) %&% Ef) + Es, dimnames = list(vars,vars), name = "E") # covMZ = A+C covMZ <- mxAlgebra( expression= rbind( cbind(A+C+E, A+C), cbind(A+C, A+C+E)), dimnames = list(selVars,selVars), name="expCovMZ" ) # covDZ = 0.5*A+C covDZ <- mxAlgebra( expression= rbind( cbind(A+C+E, 0.5%x%A+C), cbind(0.5%x%A+C, A+C+E)), dimnames = list(selVars,selVars), name="expCovDZ" ) ## Standardized variance Components ============================================ # Total phenotypic variance, V = A + C + E V <- mxAlgebra( expression= A+C+E, name="V" ) # Standardized variance components of individual indicators = ( variance component / total variance ) rowVC <- rep('VC', nv) colVC <- rep(c('A','C','E','SA','SC','SE'),each=nv) VC <- mxAlgebra( expression=cbind(A,C,E,A/V,C/V,E/V), dimnames=list(rowVC,colVC), name="VC") # Constrain the total variance of each latent common factor (Vf) to be == 1 unitM <- mxMatrix( type="Unit", nrow=nFac, ncol=1, name="unitM" ) Vf <- mxAlgebra( expression = (solve(I-B) %&% Af) + (solve(I-B) %&% Cf) + (solve(I-B) %&% Ef) , name="Vf" ) ConVar <- mxConstraint( expression = diag2vec(Vf)==unitM, name="ConVar" ) ## NB! We need to either fix the latent factor variance ## or one of its factor loadings at some value (usually fixed at 1). ## Here, we are doing the former (fixing the factor variance at 1). ## Latent true score means ===================================== # Mean matrix = μ # Matrix dimension = 1*nv Mean <- mxMatrix( type="Full", nrow=1, ncol=nv, free=TRUE, labels=paste0("mu_",vars), values = c(0.02,0.02,0.02,0.25,0.25,0.25), dimnames = list("mu",vars), name = "Mean" ) # μ ## Examine the matrix ## Dimensions, parameter labels, free/fixed parameters, starting values Mean ## Duplicate the means for the co-twin (i.e., the means are assumed to be equal across twin-order) expMean <- mxAlgebra( expression= cbind(Mean,Mean), name="expMean" ) ## Data and Expectation Objects for Multiple Groups =========================== # Data Objects for Multiple Groups dataMZ <- mxData( observed=mzdata, type="raw" ) dataDZ <- mxData( observed=dzdata, type="raw" ) # Expectation Objects for Multiple Groups # Define how the model expectations are calculated # The mxExpectationNormal function uses the algebra defined by the 'covariance' and 'means' arguments to define # the expected covariance and means under the assumption of multivariate normality objMZ <- mxExpectationNormal( covariance="expCovMZ", means="expMean", dimnames=selVars ) objDZ <- mxExpectationNormal( covariance="expCovDZ", means="expMean", dimnames=selVars ) # Function to compute -2*(log likelihood) of the data given the current values of the free parameters and the expectation function fiML <- mxFitFunctionML() ## There are five possible sources of covariance between (latent true scores of) X and Y ## Three possible sources of background confounding (covA, covC, covE) ## and two possible causal effects (byx, bxy). ## Save the object to estimate CIs for all five. ## In our best-fitting model, We can estimate the CIs of the parameters ## that end being freely estimated in that model. ci <- mxCI(c("Af_x","Cf_x","Ef_x","Af_y","Cf_y","Ef_y", "bxy","byx","covAf","covCf","covEf")) ## Combine all objects into a model =========================================== # Final steps ## Objects common to both submodels (MZ and DZ) pars <- list( ## Latent factor covariance matrices Af,Cf,Ef, ## Causal paths I,B, ## Factor loadings Fl, ## Observed-indicator-specific covariance matrices As,Cs,Es, ## Total/combined covariance matrices A,C,E, ## Objects for the means Mean,expMean, ## Objects for standardizing the variance component estimates V,VC, ## Constraint for the variance of the latent factors unitM,Vf,ConVar, ## object for confidence intervals ci ) ## Add MZ-specific objects to the MZ submodel modelMZ <- mxModel( pars, dataMZ, covMZ, objMZ, fiML, name="MZ" ) # Function to create MxModel objects ## Add DZ-specific objects to the DZ submodel modelDZ <- mxModel( pars, dataDZ, covDZ, objDZ, fiML, name="DZ" ) # Function to create MxModel objects ## Combine the submodels into one model obj <- mxFitFunctionMultigroup(c("MZ","DZ")) # Function used to fit a multiple group model ACE_model <- mxModel( "ACE", pars, modelMZ, modelDZ, obj ) ## Final check of the starting values of all free parameters omxGetParameters(ACE_model) # TUTORIAL #################################################################### ## ACE Model with the Two Latent True Scores ====================== ACE_fit <- mxRun( ACE_model ) ## Look at the standardized ACE estimates of all six observed variables var_comp <- cbind( round(diag2vec(ACE_fit$algebras$VC$result[1:6,19:24]),2), round(diag2vec(ACE_fit$algebras$VC$result[1:6,25:30]),2), round(diag2vec(ACE_fit$algebras$VC$result[1:6,31:36]),2)) rownames(var_comp) <- vars colnames(var_comp) <- c("A","C","E") var_comp # 1. Find the best-fitting ACE model (covariance-only) ============================ ## In this tutorial we will perform "global" tests of A and C variances ## i.e., test the joint statistical significance of A (or C, or A+C) ## for both latent true scores, plus all observed indicator variables. ### Global AE - correlated background A & E only, no causation ================= ## To test the joint significance of all C components ## Drop the shared environmental (C) variance components of the latent true scores ## Drop the shared environmental (C) variance components of the observed indicators AE <- mxModel( ACE_fit,name="AE") AE <- omxSetParameters( AE, label=c("Cf_x","covCf","Cf_y"),free=F,values=0) AE <- omxSetParameters( AE, label=Cs_labs,free=F,values=0) AE_fit <- mxRun( AE ) summary(AE_fit) ### Global CE - correlated background C & E only, no causation ================= ## To test the joint significance of all A components ## Drop the additive genetic (A) variance components of the latent true scores ## Drop the additive genetic (A) variance components of the observed indicators CE <- mxModel( ACE_fit,name="CE") CE <- omxSetParameters( CE, label=c("Af_x","covAf","Af_y"),free=F,values=0) CE <- omxSetParameters( CE, label=As_labs,free=F,values=0) CE_fit <- mxRun( CE ) summary(CE_fit) ### Global E - correlated background E only, no causation ====================== ## To test the joint significance of all A and all C components ## Drop the additive genetic (A) variance components of the latent true scores ## Drop the additive genetic (A) variance components of the observed indicators ## Drop the shared environmental (C) variance components of the latent true scores ## Drop the shared environmental (C) variance components of the observed indicators E <- mxModel( AE_fit,name="E") E <- omxSetParameters( E, label=c("Af_x","covAf","Af_y"),free=F,values=0) E <- omxSetParameters( E, label=As_labs,free=F,values=0) E_fit <- mxRun( E ) summary(E_fit) ## Note the missing Std Errors of Ef_x and Ef_y. ## In this model, Ef_x = total variance of Fx ## As we have fixed the total variance of Fx at 1. ## Ef_x also, indirectly, becomes fixed at 1. ## Hence, Ef_x and Ef_y are no longer freely estimated, and so have no S.E. ##### Compare ACE with global AE, CE and E covariance models (no causation) ========== subs <- c( AE_fit, CE_fit, E_fit ) comps <- mxCompare( ACE_fit,subs ) comps ## These models assume no causation between X and Y. ## So, the best-fitting model is our "null" model for further causal tests. ## Specify the best-fitting model as "null" model ## For example, if ACE_fit is the best-fitting model, ## nullACE_fit <- ACE_fit nullACE_fit <- ??? ## Now, examine the ACE estimates of X and Y. ## Refit the model with intervals=TRUE nullACE_fit <- mxRun(nullACE_fit, intervals = T) round(nullACE_fit$output$confidenceIntervals,3) ## NB: If the two phenotypes have similar ACE estimates, ## this pair of phenotypes may not be suitable for DoC modeling! # 2. Test the DoC model (No background covariance/confounding) ================ ## We'll now test the DoC model, i.e., the causal paths between Fx and Fy ### Causation from Fx to Fy. No background ACE covariance =============== Fx_to_Fy <- mxModel( nullACE_fit, name="ACE_byx") ## Drop the background ACE covariances in the null model Fx_to_Fy <- omxSetParameters( Fx_to_Fy, label=c("covAf","covCf","covEf"),free=FALSE,values=0) ## Freely estimate the causal path from Fx to Fy (byx) Fx_to_Fy <- omxSetParameters( Fx_to_Fy, label="byx",free=TRUE,values=0.2) Fx_to_Fy_fit <- mxRun( Fx_to_Fy ) summary( Fx_to_Fy_fit ) ### Causation from Fy to Fx. No background ACE covariance =============== Fy_to_Fx <- mxModel( nullACE_fit, name="ACE_bxy") ## Drop the background ACE covariances in the null model Fy_to_Fx <- omxSetParameters( Fy_to_Fx, label=c("covAf","covCf","covEf"),free=FALSE,values=0) ## Freely estimate the causal path from Fy to Fx (bxy) Fy_to_Fx <- omxSetParameters( Fy_to_Fx, label="bxy",free=TRUE,values=0.2) Fy_to_Fx_fit <- mxTryHard( Fy_to_Fx ) summary( Fy_to_Fx_fit ) ### Bidirectional Causation between Fy to Fx. No background ACE covariance =============== recip_doc <- mxModel( nullACE_fit,name="ACE_recip") ## Drop the background ACE covariances in the null model recip_doc <- omxSetParameters( recip_doc, label=c("covAf","covCf","covEf"),free=FALSE,values=0) ## Estimate the causal path from Fx to Fy (byx) recip_doc <- omxSetParameters( recip_doc, label=c("byx"),free=TRUE,values=0.2) ## as well as the causal path from Fy to Fx (bxy) recip_doc <- omxSetParameters( recip_doc, label=c("bxy"),free=TRUE,values=0.1) recip_doc_fit <- mxRun( recip_doc ) summary( recip_doc_fit ) ##### Compare the fit of the DoC models against the null ======================== subs <- c( Fx_to_Fy_fit, Fy_to_Fx_fit, recip_doc_fit ) comps <- mxCompare( nullACE_fit, subs ) comps ## We may also compare the X_to_Y DOC model to the reciprocal DOC model mxCompare( recip_doc_fit, Fx_to_Fy_fit ) ## What are the model-implied sources of covariance between X and Y ## in the best-fitting model? ## **covariance may be due to causation or background confounding. ## Estimate the confidence intervals of these parameters in the best-fitting model. ## Fill in the fitted model object of the best-fitting model ## e.g., if Fx_to_Fy_fit is the best-fitting model, ## best_fit <- Fx_to_Fy_fit best_fit <- ??? best_fit <- mxRun( best_fit, intervals = TRUE) round(best_fit$output$confidenceIntervals, 3) # BONUS: Test a hybrid model (causation + confounding) ============================= ## In "bivariate" twin models, we can usually estimate up to **three** sources of covariance ## between the two phenotypes (in this example, the two latent true scores). ## For example, we may freely estimate background A covariance, plus reciprocal causation ## Try hybrid model covA, byx, bxy ACE_hybrid <- mxModel( ACE_fit,name="Hybrid_covA_byx_bxy") ## Drop the background confounding due to C and E ACE_hybrid <- omxSetParameters( ACE_hybrid, label=c("covCf","covEf"),free=FALSE,values=0) ## Estimate the bidirectional causal paths between Fx and Fy ACE_hybrid <- omxSetParameters( ACE_hybrid, label=c("byx","bxy"),free=TRUE,values=c(0.2,0.1)) ACE_hybrid_fit <- mxRun( ACE_hybrid, intervals = T ) summary( ACE_hybrid_fit ) round(ACE_hybrid_fit$output$confidenceIntervals, 3) # Compare the model fit # NB: the causation-only DOC model is a restricted, nested model of the hybrid model mxCompare( ACE_hybrid_fit, Fx_to_Fy_fit ) # END. ===========