# Data Generation # Multiple indicator DOC model # MadhurBain Singh, Nathan Gillespie, Hermine Maes # March 8, 2024 library(OpenMx) mxOption(NULL, 'Number of Threads', parallel::detectCores()) mxVersion() # Variables ========================================= ## Observed/Manifest variables in the dataset vars <- c( ## Indicators of Phenotype X "X1","X2", "X3", ## Indicators of Phenotype Y "Y1", "Y2", "Y3" ) nv <- length(vars) # Number of manifest/observed variables per twin ## Duplicate the variable names for both twins ## Add appropriate suffix to the variable names to indicate twin 1 and twin 2 ## Order of variables: all vars for T1, then all vars for T2 in the same order selVars <- paste(vars, c(rep("T1",nv),rep("T2",nv)), sep = "_") ntv <- length(selVars) selVars ## Latent common factors, i.e., the true score (one for each phenotype) latFacs <- c("Fx","Fy") nFac <- length(latFacs) # Number of latent factor true scores or common pathways # Variance of the latent common factors ================================ # Sources of A, C & E variance on the latent factors or common pathways # NB: direction of causation goes DOWN the column & OUT along the row # nFac x nFac aFree <- c(T,F, F, T) cFree <- c(T,F, F, T) eFree <- c(T,F, F, T) aVals <- c(0.50,0, 0 ,0.15) cVals <- c(0 ,0, 0 ,0.35) eVals <- c(0.50,0, 0 ,0.40) Af <- mxMatrix(type = "Symm", nrow = nFac,ncol=nFac, labels=c("Af_x","covAf","covAf","Af_y"), free=aFree, values=aVals, dimnames = list(latFacs,latFacs), name="Af") Cf <- mxMatrix(type = "Symm", nrow = nFac,ncol=nFac, labels=c("Cf_x","covCf","covCf","Cf_y"), free=cFree, values=cVals, dimnames = list(latFacs,latFacs), name="Cf") Ef <- mxMatrix(type = "Symm", nrow = nFac,ncol=nFac, labels=c("Ef_x","covEf","covEf","E_Fy"), free=eFree, values=eVals, dimnames = list(latFacs,latFacs), name="Ef") Af Cf Ef # var Fx = 0.50+0.00+0.50 = 1 # var Fy = 0.15+0.35+0.40+0.3162278^2 = 1 # B - causal parameters between latent factors ================================ # NB: direction of causation goes DOWN the column & OUT along the row # nFac x nFac Bfree <- matrix(c(F,F, T,F), byrow = T, nrow = nFac, ncol = nFac, dimnames = list(latFacs,latFacs)) Bvals <- matrix(c(0,0, 0.3162278,0), byrow = T, nrow = nFac, ncol = nFac, dimnames = list(latFacs,latFacs)) Blabs <- matrix(c(NA,"bxy", "byx",NA), byrow = T, nrow = nFac, ncol = nFac, dimnames = list(latFacs,latFacs)) B <- mxMatrix(type="Full", nrow=nFac, ncol=nFac, labels=Blabs, free=Bfree, values=Bvals, dimnames = list(latFacs,latFacs), name = "B" ) I <- mxMatrix(type="Iden", nrow=nFac, ncol=nFac, dimnames = list(latFacs,latFacs), name = "I") # Factor Loadings ==================================================== # Factor loadings from the latent true scores to the observed (indicator) phenotypes # nv x nFac Fl_free <- matrix(c( T,F, T, F, T, F, F, T, F, T, F, T), byrow = T, nrow = nv, ncol = nFac, dimnames = list(vars,latFacs)) Fl_vals <- matrix(c( 0.95,0.00, 0.90,0.00, 0.85,0.00 , 0.00,0.95, 0.00,0.90, 0.00,0.85), byrow = T, nrow = nv, ncol = nFac, dimnames = list(vars,latFacs)) Fl_labs <- matrix(c( "Fl_x1Fx",NA, "Fl_x2Fx", NA, "Fl_x3Fx", NA, NA, "Fl_y1Fy", NA, "Fl_y2Fy", NA, "Fl_y3Fy"), byrow = T, nrow = nv, ncol = nFac, dimnames = list(vars,latFacs)) Fl <- mxMatrix(type="Full", nrow=nv, ncol=nFac, free = Fl_free, values = Fl_vals, labels=Fl_labs, dimnames = list(vars,latFacs), name = "Fl") Fl # Indicator-Specific Variance =========================================== # measurement error/residuals in each observed/manifest item/variable # (not explained by the latent true scores) # nv x nv As_labs <- c("As_X1","As_X2","As_X3","As_Y1","As_Y2","As_Y3") Cs_labs <- c("Cs_X1","Cs_X2","Cs_X3","Cs_Y1","Cs_Y2","Cs_Y3") Es_labs <- c("Es_X1","Es_X2","Es_X3","Es_Y1","Es_Y2","Es_Y3") As <- mxMatrix(type = "Diag", nrow=nv, ncol=nv, labels=As_labs, free=T, lbound=0, values=0.03, dimnames = list(vars,vars), name="As") Cs <- mxMatrix(type = "Diag", nrow=nv, ncol=nv, labels=Cs_labs, free=T, lbound=0, values=0.00, dimnames = list(vars,vars), name="Cs") Es <- mxMatrix(type = "Diag", nrow=nv, ncol=nv, labels=Es_labs, free=T, lbound=0, values=0.05, dimnames = list(vars,vars), name="Es") # Expected covariance ====================================== # [ Fl * (I-Β)~ * Vf * (I-Β)~' * Fl' ] + Vs # Fl = factor loadings # I = Identity matrix # Β = causal parameters # Vf = common factor covariance matrix # Vs = Manifest indicator-specific covariance / measurement error A <- mxAlgebra( Fl %&% (solve(I-B) %&% Af) + As, dimnames = list(vars,vars), name = "A") C <- mxAlgebra( Fl %&% (solve(I-B) %&% Cf) + Cs, dimnames = list(vars,vars), name = "C") E <- mxAlgebra( Fl %&% (solve(I-B) %&% Ef) + Es, dimnames = list(vars,vars), name = "E") # covMZ = A+C covMZ <- mxAlgebra( expression= rbind( cbind(A+C+E, A+C), cbind(A+C, A+C+E)), dimnames = list(selVars,selVars), name="expCovMZ" ) # covDZ = 0.5A + C covDZ <- mxAlgebra( expression= rbind( cbind(A+C+E, 0.5%x%A+C), cbind(0.5%x%A+C, A+C+E)), dimnames = list(selVars,selVars), name="expCovDZ" ) # Standardized variance Components ============================================ V <- mxAlgebra( expression= A+C+E, name="V" ) rowVC <- rep('VC', nv) colVC <- rep(c('A','C','E','SA','SC','SE'),each=nv) VC <- mxAlgebra( expression=cbind(A,C,E,A/V,C/V,E/V), dimnames=list(rowVC,colVC), name="VC") # Constrain the variance of the latent factors == 1 unitM <- mxMatrix( type="Unit", nrow=2, ncol=1, name="unitM" ) ConVar <- mxConstraint( expression=diag2vec( (solve(I-B) %&% Af) + (solve(I-B) %&% Cf) + (solve(I-B) %&% Ef) )==unitM, name="ConVar" ) # Latent true score means ===================================== # Mean matrix = μ # Factor mean = ((I-Β)~ * μ)' Adjusts each mean for contribution from causal pathway # Manifest mean = (Λ * ( (I-Β)~ * μ ))' Mean <- mxMatrix( type="Full", nrow=1, ncol=nv, free=TRUE, labels=paste0("mu_",vars), values = c(rep(0.01,3),rep(0.30,3)), dimnames = list("mu",vars), name = "Mean" ) # μ Mean # FacMean <- mxAlgebra( solve(I-B) %*% Mean, name="FacMean" ) # ManMean <- mxAlgebra( t(Fl %*% FacMean ), name="ManMean" ) expMean <- mxAlgebra( expression= cbind(Mean,Mean), name="expMean" ) # Expectation Objects for Multiple Groups ================================= # Define how the model expectations are calculated # The mxExpectationNormal function uses the algebra defined by the 'covariance' and 'means' arguments to define # the expected covariance and means under the assumption of multivariate normality objMZ <- mxExpectationNormal( covariance="expCovMZ", means="expMean", dimnames=selVars ) objDZ <- mxExpectationNormal( covariance="expCovDZ", means="expMean", dimnames=selVars ) # Function to compute -2*(log likelihood) of the data # given the current values of the free parameters and the expectation function fiML <- mxFitFunctionML() # Combine all objects into a model =========================================== pars <- list( Af,Cf,Ef, I,B, Fl, As,Cs,Es, A,C,E, Mean,expMean, V,VC, unitM,ConVar ) modelMZ <- mxModel( pars, covMZ, objMZ, fiML, name="MZ" ) # Function to create MxModel objects , dataMZ modelDZ <- mxModel( pars, covDZ, objDZ, fiML, name="DZ" ) # Function to create MxModel objects , dataDZ obj <- mxFitFunctionMultigroup(c("MZ","DZ")) # Function used to fit a multiple group model ACE_model <- mxModel( "ACE", pars, modelMZ, modelDZ, obj ) ## Check the simulation parameters omxGetParameters(ACE_model) # Generate Data =========================================== set.seed(2024) ACE_model_sim_data <- mxGenerateData(ACE_model, empirical=FALSE, nrows = 2000) str(ACE_model_sim_data) ## Add zygosity ACE_model_sim_data$MZ$zyg <- "MZ" ACE_model_sim_data$DZ$zyg <- "DZ" str(ACE_model_sim_data$MZ) str(ACE_model_sim_data$DZ) ## rbind docData <- data.frame(rbind(ACE_model_sim_data$MZ,ACE_model_sim_data$DZ)) ## Add family id set.seed(2024) docData$FID <- sample(10001:14000) docData <- docData[order(docData$FID),] docData <- docData[,c("FID","zyg",selVars)] ## verify head(docData) table(docData$zyg) ## Save write.table(docData,file="Data/ACE_DoC_model_sim_data.tsv", quote = F, row.names = F, col.names = T) # test <- read.table("Data/ACE_DoC_model_sim_data.tsv", header = T) # str(test)