# Mendelian randomization using the twin design # Boulder Workshop 2024 # Luis Araujo # This script is a modified version of Minica & Neale 2018 AGES workshop # In this script we will test the (bidirectional) causal effect between two # phenotypes, think of BMI on SBP and vice-versa, and polygenic scores for each. # In the interest of brevity we will always refer to BMI as variable X, and SBP # as variable Y, with the respective instruments iX and iY. # # There are four main take away messages that you should focus: # 1. How to setup a simulation in OpenMX using mxGenerateData # 2. That a two-stages least squares test can be specified in SEM in OpenMX # 3. That MR-DoC recovers the same estimates from 2sls test # 4. That in the presence of horizontal pleiotropy estimates are biased # in the 2sls test and unbiased in MR-DoC # # The script contains four scenarios, # - no pleiotropy between the instrument and the outcome (Scenario 1), # - where there is pleiotropy (Scenario 2), # - presence of a bidirectional causal effect (Scenario 3) # - absence of a causal effect (Scenario 4) # See presentation slides for path diagrams. # If you are using RStudio you can navigate using the outline dropdown menu # Setting the stage ------------------------------------------------------------ rm(list=ls()) # Emptying the R environment, not recommended usually but useful # for this workshop # loading required packages library(OpenMx) library(MASS) library(dplyr) options(digits = 2, scipen = 999) # we dont want scientific notation mxOption(NULL, "Default optimizer", "SLSQP") # The models are specified in three objects, top (for common parts), MZ and DZ # spend some time recognizing the elements in the model, by now you should # have seen similar code. Notice two minor style changes, I am naming # the objects at the beginning and the matrices labels spelled out in the # positions they will end up in the matrix. # The final objects (mrdoc1, mrdoc2) will be reused throughout the script. # The matrix containing regressions for causal paths and instruments BE <- mxMatrix(name = "BE", type = "Full",nrow=3, ncol = 3, byrow = TRUE, labels = c(NA, "g2", "b1", "g1", NA, "b2", NA, NA, NA), free = c(FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE), dimnames = list(c("X", "Y", "iX"), c("X", "Y", "iX"))) # ACE decomposition A <- mxMatrix(name = 'A', type='Symm', nrow=3,ncol = 3,byrow = TRUE, labels=c("ax2", "covA", NA, "covA","ay2", NA, NA, NA, "sigma_x"), free=c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE,FALSE,TRUE), dimnames = list(c("X", "Y", "iX"), c("X", "Y", "iX"))) C <- mxMatrix(name = 'C', type='Symm',nrow=3, ncol = 3,byrow = TRUE, labels =c("cx2", "covC", NA, "covC","cy2", NA, NA, NA, NA), free=c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE,FALSE,FALSE), dimnames = list(c("X", "Y", "iX"), c("X", "Y", "iX"))) E <- mxMatrix(name = 'E', type='Symm', nrow=3, ncol = 3,byrow = TRUE, labels =c("ex2", "covE",NA, "covE","ey2", NA, NA, NA, NA), free= c(TRUE, FALSE,FALSE, FALSE,TRUE, FALSE, FALSE,FALSE,FALSE), dimnames = list(c("X", "Y", "iX"), c("X", "Y", "iX"))) # The filter matrix is used to remove the PRS from _T2 in MZs, # if we kept it the matrix would become redundant resulting # in the Hessian not positive filter <- mxMatrix(name = 'filter', type='Full', nrow=5, ncol=6, free=FALSE, byrow = TRUE, values=c(1,0,0,0,0,0, 0,1,0,0,0,0, 0,0,1,0,0,0, 0,0,0,1,0,0, 0,0,0,0,1,0), dimnames = list(c("X_T1", "Y_T1", "iX_T1","X_T2", "Y_T2"), c("X_T1", "Y_T1", "iX_T1","X_T2", "Y_T2", "iX_T2"))) # This lambda (LY) matrix is fixing total variances to 1 for PRSs and # phenotypes LY <- mxMatrix(name = 'LY', type='Full',nrow=3, ncol = 3, free = FALSE, values = diag(3), labels = NA, dimnames = list(c("X", "Y", "iX"), c("X", "Y", "iX"))) # Means objects mean_dz <- mxMatrix(name = 'mean_dz', type='Full', nrow=1, ncol=6, free=TRUE, values= 0, byrow = TRUE, labels=c('mX1','mY2','miX1','mX1','mY2','miX1')) mean_mz <- mxAlgebra('mean_mz', expression = mean_dz%*%t(filter)) # Identity matrix for the algebras I <- mxMatrix(name = 'I', type='Iden', nrow= 3,ncol= 3) algebras <- list( mxAlgebra('A_' , expression = LY %&% solve(I - BE)%&%A), mxAlgebra('C_' , expression = LY %&% solve(I - BE)%&%C), mxAlgebra('E_' , expression = LY %&% solve(I - BE)%&%E), mxAlgebra('SPh' , expression = A_ + C_ + E_), mxAlgebra('variance_mz_', expression = rbind( cbind(SPh, A_+C_), cbind(A_+C_, SPh))), mxAlgebra('variance_dz', expression= rbind( cbind(SPh, .5%x%A_+C_), cbind(.5%x%A_+C_, SPh))), mxAlgebra('variance_mz', expression= filter%&%variance_mz_)) top_mr1 <- mxModel("top", BE, A, C, E, filter, LY, mean_dz, mean_mz, I, algebras) # Preparing the objects for the multiple groups (MZ, DZ) analysis MZ_mr1 = mxModel("MZ",mxFitFunctionML(), mxExpectationNormal(covariance = "top.variance_mz", means = "top.mean_mz", dimnames = c("X_T1", "Y_T1", "iX_T1", "X_T2", "Y_T2"))) DZ_mr1 = mxModel("DZ", mxFitFunctionML(), mxExpectationNormal(covariance = "top.variance_dz", means = "top.mean_dz", dimnames = c("X_T1", "Y_T1", "iX_T1", "X_T2", "Y_T2", "iX_T2"))) # Combining objects to generate the final model mrdoc1 = mxModel("mrdoc1", top_mr1, MZ_mr1, DZ_mr1, mxFitFunctionMultigroup(c("MZ","DZ") ) ) # Scenario 1: no pleiotropy --------------------------------------------------- # Generate simulated data for mrdoc1 # Fit the model in unrelateds using a structural equation model # Fit the model in twins - using the MR-doc # Compare the NCPs of the models in unrelateds and twins # Look at the 2 stage least squares results # Let's generate simulated data where b2 (the pleiotropic path) is zero, # in other words, no pleiotropy present in the data. # As such we need to set the true values, this is a way of doing this: true_model <- mrdoc1 |> omxSetParameters(labels = c("g1","b1", "b2", "ax2", "ay2", "cx2", "cy2", "ex2", "ey2", "covA", "covC", "covE","sigma_x"), values = c(0.316, 0.316, 0, 0.424, 0.671, 0.671, 0.424, 0.519, 0.519, 0.411,0.221,0, 1)) |> omxSetParameters(labels = c("b2"), free = FALSE) # remember, no pleiotropy # Simulating data according to the exact covariance matrix from mrdoc1 (above), # this is done using the switch empirical = T, this should be quick sim_data <- mxGenerateData(true_model, nrows = 1000, empirical = TRUE) # The data does not comes with the instrument column for the second twin, # we have to duplicate the column. In your analysis, this step will not happen, # we are doing this because of how I coded the simulation for this workshop. sim_data$MZ <- mutate(sim_data$MZ, iX_T2 = iX_T1) dnpmz <- sim_data$MZ dnpdz <- sim_data$DZ dim(dnpdz) # head(dnpdz) summary(dnpdz) dim(dnpmz) # head(dnpmz) summary(dnpmz) # We now add the data to the model object created before. # Notice that the plus sign is overloaded in OpenMx as it is, for example, # in ggplot2. So you can combine objects with the # syntax below instead of `model = mxModel(model, mxData(data, type = "raw"))` # This syntax is optional, but helps reducing the lenght of the script mrdoc1$MZ <- mrdoc1$MZ + mxData(dnpmz, type = "raw") mrdoc1$DZ <- mrdoc1$DZ + mxData(dnpdz, type = "raw") m1 <- mrdoc1 |> # Remember, no b2 in data, no b2 in the model omxSetParameters(labels = "b2", free = FALSE) |> # The poing of examining the data a few lines above is to set sensible # starting values for the model. Here, in the interest of brevity, we ask # OpenMx to pick starting values for us. mxAutoStart() m1 <- mxRun(m1) # In your analyses, make sure to test for local identification frequently: # mxCheckIdentification(m1)$status # Model is locally identified # [1] TRUE summary(m1) # One way of assessing whether the causal path is significant is by dropping it m2 <- omxSetParameters(m1, name = "nog1", labels="g1", free = FALSE, values = 0) m2 <- mxRun(m2) # Comparing the two models mxCompare(m1, m2) # Q1: We droppend the causal path, what is the interpretation for the above # result? # Can we specify a model for a 2-stage least squares test using SEM and OpenMX? IVModel = mxModel("MR", type = "RAM", manifestVars = c("X", "Y", "I"), latentVars = c("eX", "eY"), # Path from instrument to exposure mxPath(from = "I" , to = "X", arrows = 1, label = "b1"), # Path from exposure to outcome, g1 mxPath(from = "X", to = "Y", label = "g1"), # Latent error+ setting up variance and means for variables mxPath(from = c("I"), arrows = 2, label = "vI"), mxPath(from = c("eX", "eY"), to = c("X","Y"), value = 1, free = FALSE), # Variance of residual errors mxPath(from = c("eX", "eY"), arrows = 2, free = TRUE, labels = c("vX", "vY")), mxPath(connect = "unique.bivariate", from = c("eX", "eY"), arrows = 2, values = 0.2, labels = "re"), # Correlation among residuals mxPath("one", to = c("X","Y", "I"), labels = c("mX", "mY", "mI"))) # The above specification closely follows the slides in the presentation # If you have umx installed you can look at it: # library(umx) # plot(IVModel) # Let's generate a new data set by taking only twin 1 from mzs and dzs dat1 <- rbind(dnpmz[,1:3],dnpdz[,1:3]) |> # we need to rename the variables to match the dimension names set in the # IVmodel above rename(X = X_T1, Y = Y_T1, I = iX_T1) # Adding the data to the model unrel <- IVModel + mxData(dat1, type = "raw") # Know your data, check variable skewness, kurtosis and means, but # in the interest of brevity, let's autostart unrel <- mxAutoStart(unrel) unrel <- mxRun(unrel) summary(unrel) unrel2 <- omxSetParameters(name = "no_causal", unrel, label="g1", free = FALSE, values = 0) unrel2 <- mxRun(unrel2) mxCompare(unrel,unrel2) # Q2: The line above is a Likelihood ratio test with 1 degree of freedom, what is # the meaning of a significant p-value in this case? # Now let's compare with a typical 2sls test TSLS1=lm(X~I,data=dat1) Xhat=predict(TSLS1) TSLS2=lm(Y~Xhat,data=dat1) summary(TSLS2) # In the specialized ivreg package the syntax would be: # library(ivreg) # TSLS2=ivreg(Y~X|I,data=dat1) # Q3: Check out the estimate for Xhat in the previous summary and compare # to unrel estimate. Did MR-DoC estimated same values as 2sls? # Now let's check the power to reject the hypothesis of g1=0 using the # non-centrality parameter for related and unrelated individuals. lambdam1=mxCompare(m1,m2)[2,7] dfs=mxCompare(m1,m2)[2,8] alpha=0.05 ca=qchisq(alpha,dfs,ncp=0,lower.tail=F) powerm1=pchisq(ca,dfs,ncp=lambdam1,lower.tail=F) powerm1 lambdaunrel=mxCompare(unrel,unrel2)[2,7] dfs=mxCompare(unrel,unrel2)[2,8] alpha=0.05 # user specified: type I error prob. ca=qchisq(alpha,dfs,ncp=0,lower.tail=F) powerUnrel=pchisq(ca,dfs,ncp=lambdaunrel,lower.tail=F) powerUnrel # Q4: Which method had better power to detect the causal effect? # Scenario 2: Pleiotropy ------------------------------------------------- # Next consider the scenario with pleiotropy, assume re=0 and test g1 = 0 # Check the results: does MR-DoC model recover correctly the parameters b2, g1, # and b1. # Do we detect a causal effect if we don't account for pleiotropy # (SEM, 2stage least squares, 2-sample MR)? sim_data2 <- mrdoc1 |> omxSetParameters(labels = c("g1","b1", "b2", "ax2", "ay2", "cx2", "cy2", "ex2", "ey2", "covA", "covC", "covE","sigma_x"), values = c(0.143, 0.316, 0.127, 0.424, 0.67, 0.670, 0.424, 0.519, 0.519, 0.411,0.221,0, 1)) |> mxGenerateData( nrows = 1000, empirical = TRUE) sim_data2$MZ <- mutate(sim_data2$MZ, iX_T2 = iX_T1) dwpmz <- sim_data2$MZ dwpdz <- sim_data2$DZ dim(dwpdz) # head(dwpdz) summary(dwpdz) dim(dwpmz) # head(dwpmz) summary(dwpmz) pleio = mrdoc1 pleio$MZ <- mrdoc1$MZ + mxData(dwpmz, type = "raw") pleio$DZ <- mrdoc1$DZ + mxData(dwpdz, type = "raw") pleio <- mxRun(pleio) summary(pleio) # parameters used for simulation # g1 = 0.143 # b1 = 0.316 # b2 = 0.127 # Q5: check the results: does MR-DoC model recover correctly the parameters b2, # g1, and b1? Hint: look at the true values used for simulation ## MR-DoC: Test g1=0 using a likelihood ratio test ######### pleio2 <- omxSetParameters(name = "no_causal", pleio, label="g1", free = FALSE, values = 0) pleio2 <- mxRun(pleio2) mxCompare(pleio, pleio2) ## Let's run in unrelateds: --------------------------------------------------- # take twin 1 from mz and dz dat2 <- rbind(dwpmz[,1:3],dwpdz[,1:3])|> rename(X = X_T1, Y = Y_T1, I = iX_T1) unrel3 <- IVModel + mxData(dat2, type = "raw") # in the interest of brevity, let's autostart the model unrel3 <- mxAutoStart(unrel3) unrel3 <- mxRun(unrel3) summary(unrel3) # parameters used for simulation # g1 = 0.143 # b1 = 0.316 # b2 = 0.127 # Q6: Does the 2sls model recover correctly the parameters used for simulation? unrel4 <- omxSetParameters(name = "no_causal", unrel3, label="g1", free = FALSE, values = 0) unrel4 <- mxRun(unrel4) mxCompare(unrel3,unrel4) ## Which model has largest power? (look at chisq difference) ----------------- ## MR-DoC in Twins chisq_Twins=mxCompare(pleio,pleio2)[2,7] chisq_Twins ## MR-SEM in unrelateds chisq_Unrel=mxCompare(unrel3, unrel4)[2,7] chisq_Unrel # Q7: Conclusion? Larger diff, higher power ## Fit the model using two stage least squares #################### TSLS1=lm(X~I,data=dat2) Xhat=predict(TSLS1) TSLS2=lm(Y~Xhat,data=dat2) summary(TSLS2) # Check above that the 2sls test using either glm or SEM still finds # same estimates. # Q8: Do we detect a causal effect if we account for pleiotropy (SEM, 2stage # least squares)? # Scenario 3. Bidirectional causation ------------------------------------------- # Specifying the mrdoc2 model. This is similar to mrdoc1 except # larger matrices (one more instrument) # Matrix for causal paths BE <- mxMatrix(name = "BE", type = "Full",nrow=4, ncol = 4,byrow = TRUE, labels = c(NA, "g2", "b1", "b4", "g1", NA, "b2", "b3", NA, NA, NA, NA, NA, NA, NA, NA), free = c(FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), dimnames = list(c("X", "Y", "iX", "iY"), c("X", "Y", "iX", "iY"))) # A, C and E decomposition A <- mxMatrix(name = 'A', type='Symm', nrow=4, ncol = 4,byrow = TRUE, labels=c("ax2","covA",NA,NA, "covA","ay2",NA,NA, NA,NA,"x2" ,"rf", NA,NA,"rf","y2"), free=c(TRUE,TRUE,FALSE,FALSE, TRUE,TRUE,FALSE,FALSE, FALSE,FALSE,TRUE,TRUE, FALSE,FALSE,TRUE,TRUE), dimnames = list(c("X", "Y", "iX", "iY"), c("X", "Y", "iX", "iY"))) C <- mxMatrix(name = 'C', type='Symm',nrow=4, ncol = 4,byrow = TRUE, labels =c("cx2", "covC",NA,NA, "covC","cy2" ,NA,NA, NA, NA, NA,NA, NA, NA, NA,NA), free=c(TRUE,TRUE, FALSE,FALSE, TRUE,TRUE, FALSE,FALSE, FALSE,FALSE,FALSE,FALSE, FALSE,FALSE,FALSE,FALSE), dimnames = list(c("X", "Y", "iX", "iY"), c("X", "Y", "iX", "iY"))) E <- mxMatrix(name = 'E', type='Symm', nrow=4, ncol = 4,byrow = TRUE, labels =c("ex2", "covE",NA,NA, "covE","ey2" ,NA,NA, NA, NA, NA,NA, NA, NA, NA,NA), free= c(TRUE,TRUE, FALSE,FALSE, TRUE,TRUE, FALSE,FALSE, FALSE,FALSE,FALSE,FALSE, FALSE,FALSE,FALSE,FALSE), dimnames = list(c("X", "Y", "iX", "iY"), c("X", "Y", "iX", "iY"))) # A filter matrix, as we need to remove the PRSs from twin 2 filter <- mxMatrix(name = 'filter', type='Full', nrow=6, ncol=8, free=FALSE, byrow = TRUE, values=c(1,0,0,0,0,0,0,0, 0,1,0,0,0,0,0,0, 0,0,1,0,0,0,0,0, 0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0, 0,0,0,0,0,1,0,0), dimnames = list(c("X_T1", "Y_T1", "iX_T1","iY_T1", "X_T2", "Y_T2"), c("X_T1", "Y_T1", "iX_T1","iY_T1","X_T2", "Y_T2", "iX_T2","iY_T2"))) LY <- mxMatrix(name = 'LY', type='Full',nrow=4, ncol = 4, free = FALSE, values = diag(4), labels = NA, dimnames = list(c("X", "Y", "iX", "iY"), c("X", "Y", "iX", "iY"))) # The object with the means mean_dz <- mxMatrix(name = 'mean_dz', type='Full', nrow=1, ncol=8, free=TRUE, byrow = TRUE, values = 0, labels=c('mX1','mY2','miX1','miY2', 'mX1','mY2','miX1','miY2') ) # Removing the PRS from the MZ means algebras <- list( mxAlgebra('mean_mz', expression = mean_dz %*% t(filter)), # Identity matrix to algebra calculations mxMatrix(name = 'I', type='Iden', nrow= 4,ncol= 4 ), # The needed matrices for calculating the variances mxAlgebra('A_' , expression = LY %&% solve(I - BE) %&% A), mxAlgebra('C_' , expression = LY %&%solve(I - BE) %&% C), mxAlgebra('E_' , expression = LY %&%solve(I - BE) %&% E), mxAlgebra('full_variance' , expression= A_ + C_ + E_), mxAlgebra('variance_mz_', expression=rbind( cbind(full_variance, A_ + C_), cbind(A_ + C_, full_variance))), mxAlgebra('variance_dz', expression=rbind( cbind(full_variance, 0.5%x%A_ + C_), cbind(0.5%x%A_ + C_, full_variance))), mxAlgebra('variance_mz', expression= filter%&%variance_mz_)) top_mr2 <- mxModel("top", BE, A, C, E, filter, LY, mean_dz, algebras) # Preparing the objects for the multiple groups (MZ, DZ) analysis MZ_mr2 = mxModel("MZ", mxFitFunctionML(), mxExpectationNormal(covariance = "top.variance_mz",means = "top.mean_mz", dimnames = c("X_T1", "Y_T1", "iX_T1", "iY_T1", "X_T2", "Y_T2"))) DZ_mr2 = mxModel("DZ", mxFitFunctionML(), mxExpectationNormal(covariance = "top.variance_dz", means = "top.mean_dz", dimnames = c("X_T1", "Y_T1", "iX_T1", "iY_T1", "X_T2", "Y_T2", "iX_T2", "iY_T2"))) # Combining objects to generate the final model mrdoc2 = mxModel("mrdoc2", top_mr2, MZ_mr2, DZ_mr2, mxFitFunctionMultigroup(c("MZ","DZ") ) ) # Simulating using mrdoc2, now we are simulating data so that there is an # effect of X on Y and vice-versa. g1, g2 !=0 sim_data3 <- mrdoc2 |> omxSetParameters(labels = c("g1","g2", "b1", "b3", "ax2", "ay2", "cx2", "cy2", "ex2", "ey2", "covA", "covC", "covE","rf", "x2", "y2"), values = c(0.184, 0.111, 0.411, 0.519, 0.424, 0.670, 0.670, 0.424, 0.519, 0.519, 0.411,0.221,0.221,0.111, 1,1)) |> mxGenerateData( nrows = 1000, empirical = TRUE) # Remember, we need to duplicate the columns for the second twin sim_data3$MZ <- mutate(sim_data3$MZ, iX_T2 = iX_T1, iY_T2 = iY_T1) dg2mz <- sim_data3$MZ dg2dz <- sim_data3$DZ dim(dg2dz) # head(dg2dz) summary(dg2dz) dim(dg2mz) # head(dg2mz) summary(dg2mz) bidir <- mrdoc2 bidir$MZ <- bidir$MZ + mxData(dg2mz, type = "raw") bidir$DZ <- bidir$DZ + mxData(dg2dz, type = "raw") bidir <- mxAutoStart(bidir) bidir <- mxRun(bidir) # You can check for local identification: (this is slow, do this at home) # mxCheckIdentification(bidir)$status summary(bidir) # simulated parameters # g1 = 0.184 # g2 = 0.111 # b1 = 0.411 # b3 = 0.519 # Q9: Check the results: does MR-DoC model recover correctly the parameters # g1, g2, b1, and b3 # MR-DoC: Test g1=0 using a likelihood ratio test bidir2 <- omxSetParameters(name = "drop_g1g2", bidir, label=c("g1", "g2"), free = FALSE, values = 0) bidir2 <- mxRun(bidir2) mxCompare(bidir, bidir2) # Q10: How many degrees of freedom for this LRT? # Q11: Are the causal paths significant? ## Let's run in unrelateds X -> Y: --------------------------------------------- # We are proceeding to check if running 2sls in each direction matches the true # values we set in the simulation. Remember the model includes indirect # pleiotropy of type PS2 -> rf -> PS1 -> X # take twin 1 from mz and dz dat4=rbind(dg2mz[,1:4],dg2dz[,1:4]) |> rename(X = X_T1, Y = Y_T1, I = iX_T1) unrel7 <- IVModel + mxData(dat4, type = "raw") # in the interest of brevity, let's autostart the model unrel7 <- mxAutoStart(unrel7) unrel7 <- mxRun(unrel7) summary(unrel7) ## Let's run in unrelateds Y -> X: --------------------------------------------- # take twin 1 from mz and dz dat5=rbind(dg2mz[,1:4],dg2dz[,1:4]) |> rename(Y = X_T1, # notice the inversion here X = Y_T1, # notice the inversion here I = iY_T1) # The inversion above is only necessary so we don't have to rewrite the model # from scratch. # Bear with me as I rename the parameters in the base model # This will help interpreting results IVModelYX = omxSetParameters(name = "IVModelYX", IVModel, label = c("g1","b1", "vX", "vY", "mX", "mY"), newlabel = c("g2","b3", "vY", "vX", "mY", "mX")) unrel8 <- IVModelYX + mxData(dat5, type = "raw") # in the interest of brevity, let's autostart the model unrel8 <- mxAutoStart(unrel8) unrel8 <- mxRun(unrel8) summary(unrel8) # The result of the summary immediately above is of the relationship of # Y to X, in other words g2. # simulated parameters # g1 = 0.184 # g2 = 0.111 # b1 = 0.411 # b3 = 0.519 # Q12: Does MR-SEM models (unrel7 or X->Y, and model unrel8 Y <- X) recover correctly the parameters used for simulation? # What happened to the estimates? # Scenario 4: Pleiotropy & no causal effect STRETCH GOAL! ------------------- # We are back to MR-DoC1 and we will be simulating data without # the causal effect # Compare the results obtained with MR-DoC, unrelateds SEM, 2SLS, 2-sample MR. sim_data4 <- mrdoc1 |> omxSetParameters(labels = c("g1","b1", "b2", "ax2", "ay2", "cx2", "cy2", "ex2", "ey2", "covA", "covC", "covE","sigma_x"), values = c(0, 0.316, 0.316, 0.424, 0.670, 0.670, 0.424, 0.519, 0.519, 0.411,0.221,0, 1)) |> mxGenerateData( nrows = 1000, empirical = TRUE) sim_data4$MZ <- mutate(sim_data4$MZ, iX_T2 = iX_T1) dg1mz <- sim_data4$MZ dg1dz <- sim_data4$DZ dim(dg1dz) # head(dg1dz) summary(dg1dz) dim(dg1mz) # head(dg1mz) summary(dg1mz) no_causal <- mrdoc1 no_causal$MZ <- mrdoc1$MZ + mxData(dg1mz, type = "raw") no_causal$DZ <- mrdoc1$DZ + mxData(dg1dz, type = "raw") no_causal <- mxRun(no_causal) summary(no_causal) # Q13: Check the results: does MR-DoC model recover correctly the parameters b2, # g1, and b1? ## MR-DoC: Test g1=0 using a likelihood ratio test ######### no_causal2 <- omxSetParameters(name = "drop_g1", no_causal, label="g1", free = FALSE, values = 0) no_causal2 <- mxRun(no_causal2) mxCompare(no_causal, no_causal2) # Q14: Do we detect a causal effect with a true g1 value set to zero? ## Let's run in unrelateds: --------------------------------------------------- # take twin 1 from mz and dz dat3 <- rbind(dg1mz[,1:3],dg1dz[,1:3])|> rename(X = X_T1, Y = Y_T1, I = iX_T1) unrel5 <- IVModel + mxData(dat3, type = "raw") # in the interest of brevity, let's autostart the model unrel5 <- mxAutoStart(unrel5) unrel5 <- mxRun(unrel5) # parameters used for simulation # g1 = 0 # b1 = 0.316 # b2 = 0.316 summary(unrel5) # Q15: Does MR-SEM model recover correctly the parameters used for simulation? unrel6 <- omxSetParameters(name = "no_causal", unrel5, label="g1", free = FALSE, values = 0) unrel6 <- mxRun(unrel6) mxCompare(unrel5,unrel6) ## Fit the model using two stage least squares #################### TSLS1=lm(X~I,data=dat3) Xhat=predict(TSLS1) TSLS2=lm(Y~Xhat,data=dat3) summary(TSLS2) # The 2sls regression again recover the same biased estimates as MR-SEM. In # the presence of pleiotropy MR-DoC outperforms the other methods.