# Mendelian randomization using the twin design
# Boulder Workshop 2024
# Luis Araujo
# This script is a modified version of Minica & Neale 2018 AGES workshop

# In this script we will test the (bidirectional) causal effect between two
# phenotypes, think of BMI on SBP and vice-versa, and polygenic scores for each.
# In the interest of brevity we will always refer to BMI as variable X, and SBP
# as variable Y, with the respective instruments iX and iY.
#
# There are four main take away messages that you should focus:
#   1. How to setup a simulation in OpenMX using mxGenerateData
#   2. That a two-stages least squares test can be specified in SEM in OpenMX
#   3. That MR-DoC recovers the same estimates from 2sls test
#   4. That in the presence of horizontal pleiotropy estimates are biased
#         in the 2sls test and unbiased in MR-DoC
#
# The script contains four scenarios,
#   - no pleiotropy between the instrument and the outcome (Scenario 1),
#   - where there is pleiotropy (Scenario 2),
#   - presence of a bidirectional causal effect (Scenario 3)
#   - absence of a  causal effect (Scenario 4)
# See presentation slides for path diagrams.
# If you are using RStudio you can navigate using the outline dropdown menu


# Setting the stage ------------------------------------------------------------

rm(list=ls())  # Emptying the R environment, not recommended usually but useful
               # for this workshop


# loading required packages
library(OpenMx)
library(MASS)
library(dplyr)

options(digits = 2, scipen = 999)  # we dont want scientific notation
mxOption(NULL, "Default optimizer", "SLSQP")

# The models are specified in three objects, top (for common parts), MZ and DZ
# spend some time recognizing the elements in the model, by now you should
# have seen similar code. Notice two minor style changes, I am naming
# the objects at the beginning and the matrices labels spelled out in the
# positions they will end up in the matrix.
# The final objects (mrdoc1, mrdoc2) will be reused throughout the script.
# The matrix containing regressions for causal paths and instruments
BE <- mxMatrix(name = "BE", type = "Full",nrow=3,  ncol = 3, byrow = TRUE,
           labels = c(NA,   "g2", "b1",
                      "g1", NA,   "b2",
                      NA,   NA,   NA),
           free = c(FALSE, FALSE, TRUE,
                    TRUE,  FALSE, TRUE,
                    FALSE, FALSE, FALSE),
           dimnames = list(c("X", "Y", "iX"),
                           c("X", "Y", "iX")))

# ACE decomposition
A <-  mxMatrix(name = 'A', type='Symm', nrow=3,ncol = 3,byrow = TRUE,
           labels=c("ax2", "covA", NA,
                    "covA","ay2",  NA,
                    NA,    NA,     "sigma_x"),
           free=c(TRUE, TRUE, FALSE,
                  TRUE, TRUE, FALSE,
                  FALSE,FALSE,TRUE),
           dimnames = list(c("X", "Y", "iX"),
                           c("X", "Y", "iX")))

C <-  mxMatrix(name = 'C', type='Symm',nrow=3, ncol = 3,byrow = TRUE,
           labels =c("cx2", "covC", NA,
                     "covC","cy2",  NA,
                     NA,    NA,     NA),
           free=c(TRUE, TRUE, FALSE,
                  TRUE, TRUE, FALSE,
                  FALSE,FALSE,FALSE),
           dimnames = list(c("X", "Y", "iX"),
                           c("X", "Y", "iX")))

E <-  mxMatrix(name = 'E', type='Symm', nrow=3, ncol = 3,byrow = TRUE,
           labels =c("ex2", "covE",NA,
                     "covE","ey2", NA,
                     NA,     NA,   NA),
           free= c(TRUE, FALSE,FALSE,
                   FALSE,TRUE, FALSE,
                   FALSE,FALSE,FALSE),
           dimnames = list(c("X", "Y", "iX"),
                           c("X", "Y", "iX")))

# The filter matrix is used to remove the PRS from _T2 in MZs,
# if we kept it the matrix would become redundant resulting
# in the Hessian not positive
filter <-  mxMatrix(name = 'filter', type='Full', nrow=5, ncol=6, free=FALSE,
           byrow = TRUE,
           values=c(1,0,0,0,0,0,
                    0,1,0,0,0,0,
                    0,0,1,0,0,0,
                    0,0,0,1,0,0,
                    0,0,0,0,1,0),
           dimnames = list(c("X_T1", "Y_T1", "iX_T1","X_T2", "Y_T2"),
                           c("X_T1", "Y_T1", "iX_T1","X_T2", "Y_T2", "iX_T2")))

# This lambda (LY) matrix is fixing total variances to 1 for PRSs and
# phenotypes
LY <-  mxMatrix(name = 'LY', type='Full',nrow=3, ncol = 3, free = FALSE,
           values = diag(3), labels = NA,
           dimnames = list(c("X", "Y", "iX"),
                           c("X", "Y", "iX")))

# Means objects
mean_dz <-  mxMatrix(name = 'mean_dz', type='Full', nrow=1, ncol=6,
           free=TRUE, values= 0, byrow = TRUE,
           labels=c('mX1','mY2','miX1','mX1','mY2','miX1'))

mean_mz <-  mxAlgebra('mean_mz', expression = mean_dz%*%t(filter))

# Identity matrix for the algebras
I <-  mxMatrix(name = 'I', type='Iden', nrow= 3,ncol= 3)

algebras <- list(
  mxAlgebra('A_'  , expression = LY %&% solve(I - BE)%&%A),
  mxAlgebra('C_'  , expression = LY %&% solve(I - BE)%&%C),
  mxAlgebra('E_'  , expression = LY %&% solve(I - BE)%&%E),
  mxAlgebra('SPh' , expression = A_ + C_ + E_),
  mxAlgebra('variance_mz_', expression = rbind(
    cbind(SPh, A_+C_),
    cbind(A_+C_, SPh))),
  mxAlgebra('variance_dz', expression= rbind(
    cbind(SPh, .5%x%A_+C_),
    cbind(.5%x%A_+C_, SPh))),
  mxAlgebra('variance_mz', expression= filter%&%variance_mz_))

top_mr1 <- mxModel("top", BE, A, C, E, filter, LY,  mean_dz, mean_mz, I, algebras)

# Preparing the objects for the multiple groups (MZ, DZ) analysis
MZ_mr1 = mxModel("MZ",mxFitFunctionML(),
  mxExpectationNormal(covariance = "top.variance_mz", means = "top.mean_mz",
                      dimnames =  c("X_T1", "Y_T1", "iX_T1",
                                    "X_T2", "Y_T2")))

DZ_mr1 = mxModel("DZ", mxFitFunctionML(),
  mxExpectationNormal(covariance = "top.variance_dz", means = "top.mean_dz",
                      dimnames =  c("X_T1", "Y_T1", "iX_T1",
                                    "X_T2", "Y_T2", "iX_T2")))

# Combining objects to generate the final model
mrdoc1 = mxModel("mrdoc1", top_mr1, MZ_mr1, DZ_mr1,
                 mxFitFunctionMultigroup(c("MZ","DZ") ) )


# Scenario 1: no pleiotropy  ---------------------------------------------------

# Generate simulated data for mrdoc1
# Fit the model in unrelateds using a structural equation model
# Fit the model in twins - using the MR-doc
# Compare the NCPs of the models in unrelateds and twins
# Look at the 2 stage least squares results


# Let's generate  simulated data where b2 (the pleiotropic path) is zero,
# in other words, no pleiotropy present in the data.
# As such we need to set the true values, this is a way of doing this:
true_model <-  mrdoc1 |>
  omxSetParameters(labels =  c("g1","b1", "b2",
                               "ax2", "ay2", "cx2", "cy2", "ex2", "ey2",
                               "covA", "covC", "covE","sigma_x"),
                   values = c(0.316, 0.316, 0,
                              0.424, 0.671, 0.671, 0.424, 0.519, 0.519,
                              0.411,0.221,0, 1)) |>
  omxSetParameters(labels = c("b2"), free = FALSE) # remember, no pleiotropy

# Simulating data according to the exact covariance matrix from mrdoc1 (above),
# this is done using the switch empirical = T, this should be quick
sim_data <- mxGenerateData(true_model, nrows = 1000, empirical = TRUE)

# The data does not comes with the instrument column for the second twin,
# we have to duplicate the column. In your analysis, this step will not happen,
# we are doing this because of how I coded the simulation for this workshop.
sim_data$MZ <- mutate(sim_data$MZ, iX_T2 = iX_T1)

dnpmz <- sim_data$MZ
dnpdz <- sim_data$DZ

dim(dnpdz) #
head(dnpdz)
summary(dnpdz)

dim(dnpmz) #
head(dnpmz)
summary(dnpmz)

# We now add the data to the model object created before.
# Notice that the plus sign is overloaded in OpenMx as it is, for example,
# in ggplot2. So you can combine objects with the
# syntax below instead of `model = mxModel(model, mxData(data, type = "raw"))`
# This syntax is optional, but helps reducing the lenght of the script
mrdoc1$MZ <- mrdoc1$MZ +  mxData(dnpmz, type = "raw")
mrdoc1$DZ <- mrdoc1$DZ +  mxData(dnpdz, type = "raw")

m1 <- mrdoc1 |>
  # Remember, no b2 in data, no b2 in the model
  omxSetParameters(labels = "b2", free = FALSE) |>
  # The poing of examining the data a few lines above is to set sensible
  # starting values for the model. Here, in the interest of brevity, we ask
  # OpenMx to pick starting values for us.
  mxAutoStart()

m1 <- mxRun(m1)

# In your analyses, make sure to test for local identification frequently:
# mxCheckIdentification(m1)$status
# Model is locally identified
# [1] TRUE

summary(m1)

# One way of assessing whether the causal path is significant is by dropping it
m2 <- omxSetParameters(m1, name = "nog1", labels="g1", free = FALSE,
                       values = 0)
m2 <- mxRun(m2)

# Comparing the two models
mxCompare(m1, m2)

# Q1: We droppend the causal path, what is the interpretation for the above
# result?

# Can we specify a model for a 2-stage least squares test using SEM and OpenMX?
IVModel = mxModel("MR", type = "RAM", manifestVars = c("X", "Y", "I"),
                    latentVars = c("eX", "eY"),
   # Path from instrument to exposure
   mxPath(from = "I" , to = "X", arrows = 1, label = "b1"),
   # Path from exposure to outcome, g1
   mxPath(from = "X", to = "Y", label = "g1"),
   # Latent error+ setting up variance and means for variables
   mxPath(from = c("I"), arrows = 2, label = "vI"),
   mxPath(from = c("eX", "eY"), to = c("X","Y"), value = 1, free = FALSE),
   # Variance of residual errors
   mxPath(from = c("eX", "eY"), arrows =  2, free = TRUE,
          labels = c("vX", "vY")),
   mxPath(connect = "unique.bivariate", from =  c("eX", "eY"),   arrows = 2,
          values = 0.2, labels = "re"), # Correlation among residuals
   mxPath("one", to = c("X","Y", "I"), labels = c("mX", "mY", "mI")))

# The above specification closely follows the slides in the presentation
# If you have umx installed you can look at it:
# library(umx)
# plot(IVModel)

# Let's generate a new data set by taking only twin 1 from mzs and dzs
dat1 <- rbind(dnpmz[,1:3],dnpdz[,1:3]) |>
  # we need to rename the variables to match the dimension names set in the
  # IVmodel above
  rename(X = X_T1,
         Y = Y_T1,
         I = iX_T1)

# Adding the data to the model
unrel <- IVModel + mxData(dat1, type = "raw")

# Know your data, check variable skewness, kurtosis and means, but
# in the interest of brevity, let's autostart
unrel <- mxAutoStart(unrel)
unrel <- mxRun(unrel)
summary(unrel)

unrel2 <- omxSetParameters(name = "no_causal", unrel, label="g1", free = FALSE,
                           values = 0)
unrel2 <- mxRun(unrel2)

mxCompare(unrel,unrel2)

# Q2: The line above is a Likelihood ratio test with 1 degree of freedom, what is
# the meaning of a significant p-value in this case?


# Now let's compare with a typical 2sls test
TSLS1=lm(X~I,data=dat1)
Xhat=predict(TSLS1)
TSLS2=lm(Y~Xhat,data=dat1)
summary(TSLS2)

# In the specialized ivreg package the syntax would be:
# library(ivreg)
# TSLS2=ivreg(Y~X|I,data=dat1)


# Q3: Check out the estimate for Xhat in the previous summary and compare
# to unrel estimate. Did MR-DoC estimated same values as 2sls?


# Now let's check the power to reject the hypothesis of g1=0 using the
# non-centrality parameter for related and unrelated individuals.

lambdam1=mxCompare(m1,m2)[2,7]
dfs=mxCompare(m1,m2)[2,8]
alpha=0.05
ca=qchisq(alpha,dfs,ncp=0,lower.tail=F)
powerm1=pchisq(ca,dfs,ncp=lambdam1,lower.tail=F)
powerm1


lambdaunrel=mxCompare(unrel,unrel2)[2,7]
dfs=mxCompare(unrel,unrel2)[2,8]
alpha=0.05 	# user specified: type I error prob.
ca=qchisq(alpha,dfs,ncp=0,lower.tail=F)
powerUnrel=pchisq(ca,dfs,ncp=lambdaunrel,lower.tail=F)
powerUnrel


# Q4: Which method had better power to detect the causal effect?


#    Scenario 2: Pleiotropy -------------------------------------------------
# Next consider the scenario with pleiotropy, assume re=0 and test g1 = 0
# Check the results: does MR-DoC model recover correctly the parameters b2, g1,
# and b1.
# Do we detect a causal effect if we don't account for pleiotropy
# (SEM, 2stage least squares, 2-sample MR)?

sim_data2 <-  mrdoc1 |>
  omxSetParameters(labels =  c("g1","b1", "b2",
                               "ax2", "ay2", "cx2", "cy2", "ex2", "ey2",
                               "covA", "covC", "covE","sigma_x"),
                   values = c(0.143, 0.316, 0.127,
                              0.424, 0.67, 0.670, 0.424, 0.519, 0.519,
                              0.411,0.221,0, 1)) |>
  mxGenerateData( nrows = 1000, empirical = TRUE)

sim_data2$MZ <- mutate(sim_data2$MZ, iX_T2 = iX_T1)

dwpmz <- sim_data2$MZ
dwpdz <- sim_data2$DZ

dim(dwpdz) #
head(dwpdz)
summary(dwpdz)


dim(dwpmz) #
head(dwpmz)
summary(dwpmz)

pleio = mrdoc1
pleio$MZ <- mrdoc1$MZ + mxData(dwpmz, type = "raw")
pleio$DZ <- mrdoc1$DZ + mxData(dwpdz, type = "raw")

pleio <- mxRun(pleio)
summary(pleio)

# parameters used for simulation
# g1 = 0.143
# b1 = 0.316
# b2 = 0.127
# Q5: check the results: does MR-DoC model recover correctly the parameters b2,
# g1, and b1? Hint: look at the true values used for simulation


##  MR-DoC: Test g1=0 using a likelihood ratio test   #########
pleio2 <- omxSetParameters(name = "no_causal", pleio, label="g1", free = FALSE,
                           values = 0)
pleio2 <- mxRun(pleio2)

mxCompare(pleio, pleio2)


## Let's run in unrelateds: ---------------------------------------------------

# take twin 1 from mz and dz
dat2 <- rbind(dwpmz[,1:3],dwpdz[,1:3])|>
  rename(X = X_T1,
         Y = Y_T1,
         I = iX_T1)


unrel3 <- IVModel + mxData(dat2, type = "raw")
# in the interest of brevity, let's autostart the model
unrel3 <- mxAutoStart(unrel3)
unrel3 <- mxRun(unrel3)


summary(unrel3)

# parameters used for simulation
# g1 = 0.143
# b1 = 0.316
# b2 = 0.127
# Q6: Does the 2sls model recover correctly the parameters used for simulation?


unrel4 <- omxSetParameters(name = "no_causal", unrel3, label="g1", free = FALSE,
                           values = 0)
unrel4 <- mxRun(unrel4)
mxCompare(unrel3,unrel4)

## Which model has largest power? (look at chisq difference) -----------------

## MR-DoC in Twins
chisq_Twins=mxCompare(pleio,pleio2)[2,7]
chisq_Twins

## MR-SEM in unrelateds
chisq_Unrel=mxCompare(unrel3, unrel4)[2,7]
chisq_Unrel

# Q7: Conclusion? Larger diff, higher power

##  Fit the model using two stage least squares     ####################
TSLS1=lm(X~I,data=dat2)
Xhat=predict(TSLS1)
TSLS2=lm(Y~Xhat,data=dat2)
summary(TSLS2)
# Check above that the 2sls test using either glm or SEM still finds
# same estimates.

# Q8: Do we detect a causal effect if we account for pleiotropy (SEM, 2stage
# least squares)?


# Scenario 3. Bidirectional causation -------------------------------------------

# Specifying the mrdoc2 model. This is similar to mrdoc1 except
# larger matrices (one more instrument)
  # Matrix for causal paths
BE <-  mxMatrix(name = "BE", type = "Full",nrow=4,  ncol = 4,byrow = TRUE,
           labels = c(NA,   "g2", "b1", "b4",
                      "g1", NA,   "b2", "b3",
                      NA,   NA,   NA,   NA,
                      NA,   NA,   NA,   NA),
           free = c(FALSE, TRUE, TRUE, FALSE,
                    TRUE, FALSE, FALSE, TRUE,
                    FALSE, FALSE, FALSE, FALSE,
                    FALSE, FALSE, FALSE, FALSE),
           dimnames = list(c("X", "Y", "iX", "iY"),
                           c("X", "Y", "iX", "iY")))

# A, C and E decomposition
A <- mxMatrix(name = 'A', type='Symm', nrow=4, ncol = 4,byrow = TRUE,
           labels=c("ax2","covA",NA,NA,
                    "covA","ay2",NA,NA,
                    NA,NA,"x2"  ,"rf",
                    NA,NA,"rf","y2"),
           free=c(TRUE,TRUE,FALSE,FALSE,
                  TRUE,TRUE,FALSE,FALSE,
                  FALSE,FALSE,TRUE,TRUE,
                  FALSE,FALSE,TRUE,TRUE),
           dimnames = list(c("X", "Y", "iX", "iY"),
                           c("X", "Y", "iX", "iY")))

C <-  mxMatrix(name = 'C', type='Symm',nrow=4, ncol = 4,byrow = TRUE,
           labels =c("cx2", "covC",NA,NA,
                     "covC","cy2" ,NA,NA,
                     NA,    NA,    NA,NA,
                     NA,    NA,    NA,NA),
           free=c(TRUE,TRUE,  FALSE,FALSE,
                  TRUE,TRUE,  FALSE,FALSE,
                  FALSE,FALSE,FALSE,FALSE,
                  FALSE,FALSE,FALSE,FALSE),
           dimnames = list(c("X", "Y", "iX", "iY"),
                           c("X", "Y", "iX", "iY")))

E <-  mxMatrix(name = 'E', type='Symm', nrow=4, ncol = 4,byrow = TRUE,
           labels =c("ex2", "covE",NA,NA,
                     "covE","ey2" ,NA,NA,
                     NA,    NA,    NA,NA,
                     NA,    NA,    NA,NA),
           free= c(TRUE,TRUE,  FALSE,FALSE,
                   TRUE,TRUE,  FALSE,FALSE,
                   FALSE,FALSE,FALSE,FALSE,
                   FALSE,FALSE,FALSE,FALSE),
           dimnames = list(c("X", "Y", "iX", "iY"),
                           c("X", "Y", "iX", "iY")))

  # A filter matrix, as we need to remove the PRSs from twin 2
filter <-  mxMatrix(name = 'filter', type='Full', nrow=6, ncol=8, free=FALSE,
           byrow = TRUE,
           values=c(1,0,0,0,0,0,0,0,
                    0,1,0,0,0,0,0,0,
                    0,0,1,0,0,0,0,0,
                    0,0,0,1,0,0,0,0,
                    0,0,0,0,1,0,0,0,
                    0,0,0,0,0,1,0,0),
           dimnames = list(c("X_T1", "Y_T1", "iX_T1","iY_T1",
                             "X_T2", "Y_T2"),
                           c("X_T1", "Y_T1", "iX_T1","iY_T1","X_T2",
                             "Y_T2", "iX_T2","iY_T2")))

LY <-  mxMatrix(name = 'LY', type='Full',nrow=4, ncol = 4, free = FALSE,
           values = diag(4), labels = NA,
           dimnames = list(c("X", "Y", "iX", "iY"),
                           c("X", "Y", "iX", "iY")))

# The object with the means
mean_dz <-  mxMatrix(name = 'mean_dz', type='Full', nrow=1, ncol=8, free=TRUE,
           byrow = TRUE, values = 0, labels=c('mX1','mY2','miX1','miY2',
                                              'mX1','mY2','miX1','miY2') )

# Removing the PRS from the MZ means
algebras <- list(  mxAlgebra('mean_mz', expression = mean_dz %*% t(filter)),
  # Identity matrix to algebra calculations
  mxMatrix(name = 'I', type='Iden', nrow= 4,ncol= 4 ),
  # The needed matrices for calculating the variances
  mxAlgebra('A_'  , expression =  LY %&% solve(I - BE) %&% A),
  mxAlgebra('C_'  , expression =  LY %&%solve(I - BE) %&% C),
  mxAlgebra('E_'  , expression =  LY %&%solve(I - BE) %&% E),
  mxAlgebra('full_variance' , expression= A_ + C_ + E_),
  mxAlgebra('variance_mz_', expression=rbind(
    cbind(full_variance, A_ + C_),
    cbind(A_ + C_, full_variance))),
  mxAlgebra('variance_dz', expression=rbind(
    cbind(full_variance, 0.5%x%A_ + C_),
    cbind(0.5%x%A_ + C_, full_variance))),
  mxAlgebra('variance_mz', expression= filter%&%variance_mz_))

top_mr2 <- mxModel("top", BE, A, C, E, filter, LY, mean_dz,  algebras)

# Preparing the objects for the multiple groups (MZ, DZ) analysis
MZ_mr2 = mxModel("MZ", mxFitFunctionML(),
  mxExpectationNormal(covariance = "top.variance_mz",means = "top.mean_mz",
                      dimnames =  c("X_T1", "Y_T1", "iX_T1", "iY_T1",
                                    "X_T2", "Y_T2")))

DZ_mr2 = mxModel("DZ", mxFitFunctionML(),
  mxExpectationNormal(covariance = "top.variance_dz", means = "top.mean_dz",
                      dimnames =  c("X_T1", "Y_T1", "iX_T1", "iY_T1",
                                    "X_T2", "Y_T2", "iX_T2", "iY_T2")))

# Combining objects to generate the final model
mrdoc2 = mxModel("mrdoc2", top_mr2, MZ_mr2, DZ_mr2,
                 mxFitFunctionMultigroup(c("MZ","DZ") ) )

# Simulating using mrdoc2, now we are simulating data so that there is an
# effect of X on Y and vice-versa. g1, g2 !=0
sim_data3 <- mrdoc2 |>
  omxSetParameters(labels =  c("g1","g2", "b1", "b3",
                               "ax2", "ay2", "cx2", "cy2", "ex2", "ey2",
                               "covA", "covC", "covE","rf", "x2", "y2"),
                   values = c(0.184, 0.111, 0.411, 0.519,
                              0.424, 0.670, 0.670, 0.424, 0.519, 0.519,
                              0.411,0.221,0.221,0.111, 1,1)) |>
  mxGenerateData( nrows = 1000, empirical = TRUE)

# Remember, we need to duplicate the columns for the second twin
sim_data3$MZ <- mutate(sim_data3$MZ, iX_T2 = iX_T1, iY_T2 = iY_T1)

dg2mz <- sim_data3$MZ
dg2dz <- sim_data3$DZ


dim(dg2dz) #
head(dg2dz)
summary(dg2dz)

dim(dg2mz) #
head(dg2mz)
summary(dg2mz)

bidir <- mrdoc2
bidir$MZ <- bidir$MZ + mxData(dg2mz, type = "raw")
bidir$DZ <- bidir$DZ + mxData(dg2dz, type = "raw")

bidir <- mxAutoStart(bidir)
bidir <- mxRun(bidir)

# You can check for local identification: (this is slow, do this at home)
# mxCheckIdentification(bidir)$status

summary(bidir)

# simulated parameters
# g1 = 0.184
# g2 = 0.111
# b1 = 0.411
# b3 = 0.519
# Q9: Check the results: does MR-DoC model recover correctly the parameters
# g1, g2, b1, and b3


#  MR-DoC: Test g1=0 using a likelihood ratio test
bidir2 <- omxSetParameters(name = "drop_g1g2", bidir, label=c("g1", "g2"),
                           free = FALSE, values = 0)
bidir2 <-  mxRun(bidir2)

mxCompare(bidir, bidir2)

# Q10: How many degrees of freedom for this LRT?

# Q11: Are the causal paths significant?

## Let's run in unrelateds X -> Y: ---------------------------------------------

# We are proceeding to check if running 2sls in each direction matches the true
# values we set in the simulation. Remember the model includes indirect
# pleiotropy of type PS2 -> rf -> PS1 -> X


# take twin 1 from mz and dz
dat4=rbind(dg2mz[,1:4],dg2dz[,1:4]) |>
  rename(X = X_T1,
         Y = Y_T1,
         I = iX_T1)

unrel7 <- IVModel + mxData(dat4, type = "raw")
# in the interest of brevity, let's autostart the model
unrel7 <- mxAutoStart(unrel7)
unrel7 <- mxRun(unrel7)

summary(unrel7)

## Let's run in unrelateds Y -> X: ---------------------------------------------

# take twin 1 from mz and dz
dat5=rbind(dg2mz[,1:4],dg2dz[,1:4]) |>
  rename(Y = X_T1,  # notice the inversion here
         X = Y_T1,  # notice the inversion here
         I = iY_T1)
# The inversion above is only necessary so we don't have to rewrite the model
# from scratch.

# Bear with me as I rename the parameters in the base model
# This will help interpreting results
IVModelYX = omxSetParameters(name = "IVModelYX", IVModel, label = c("g1","b1", "vX", "vY", "mX", "mY"),
                           newlabel = c("g2","b3", "vY", "vX", "mY", "mX"))

unrel8 <- IVModelYX + mxData(dat5, type = "raw")
# in the interest of brevity, let's autostart the model
unrel8 <- mxAutoStart(unrel8)
unrel8 <- mxRun(unrel8)
summary(unrel8)

# The result of the summary immediately above is of the relationship of
# Y to X, in other words g2.

# simulated parameters
# g1 = 0.184
# g2 = 0.111
# b1 = 0.411
# b3 = 0.519
# Q12: Does MR-SEM models (unrel7 or X->Y, and model unrel8 Y <- X) recover correctly the parameters used for simulation?
# What happened to the estimates?


# Scenario 4: Pleiotropy & no causal effect STRETCH GOAL!  -------------------
# We are back to MR-DoC1 and we will be simulating data without
# the causal effect
# Compare the results obtained with MR-DoC, unrelateds SEM, 2SLS, 2-sample MR.
sim_data4 <-  mrdoc1 |>
  omxSetParameters(labels =  c("g1","b1", "b2",
                               "ax2", "ay2", "cx2", "cy2", "ex2", "ey2",
                               "covA", "covC", "covE","sigma_x"),
                   values = c(0, 0.316, 0.316,
                              0.424, 0.670, 0.670, 0.424, 0.519, 0.519,
                              0.411,0.221,0, 1)) |>
  mxGenerateData( nrows = 1000, empirical = TRUE)

sim_data4$MZ <-  mutate(sim_data4$MZ, iX_T2 = iX_T1)

dg1mz <- sim_data4$MZ
dg1dz <- sim_data4$DZ

dim(dg1dz) #
head(dg1dz)
summary(dg1dz)

dim(dg1mz) #
head(dg1mz)
summary(dg1mz)

no_causal <- mrdoc1
no_causal$MZ <- mrdoc1$MZ + mxData(dg1mz, type = "raw")
no_causal$DZ <- mrdoc1$DZ + mxData(dg1dz, type = "raw")

no_causal <- mxRun(no_causal)
summary(no_causal)

# Q13: Check the results: does MR-DoC model recover correctly the parameters b2,
# g1, and b1?


##  MR-DoC: Test g1=0 using a likelihood ratio test   #########

no_causal2 <- omxSetParameters(name = "drop_g1", no_causal, label="g1",
                               free = FALSE, values = 0)
no_causal2 <- mxRun(no_causal2)
mxCompare(no_causal, no_causal2)

# Q14: Do we detect a causal effect with a true g1 value set to zero?


## Let's run in unrelateds: ---------------------------------------------------

# take twin 1 from mz and dz
dat3 <- rbind(dg1mz[,1:3],dg1dz[,1:3])|>
  rename(X = X_T1,
         Y = Y_T1,
         I = iX_T1)

unrel5 <- IVModel + mxData(dat3, type = "raw")
# in the interest of brevity, let's autostart the model
unrel5 <- mxAutoStart(unrel5)
unrel5 <- mxRun(unrel5)

# parameters used for simulation
# g1 = 0
# b1 = 0.316
# b2 = 0.316
summary(unrel5)

# Q15: Does MR-SEM model recover correctly the parameters used for simulation?

unrel6 <- omxSetParameters(name = "no_causal", unrel5, label="g1", free = FALSE,
                           values = 0)
unrel6 <- mxRun(unrel6)
mxCompare(unrel5,unrel6)


##  Fit the model using two stage least squares     ####################
TSLS1=lm(X~I,data=dat3)
Xhat=predict(TSLS1)
TSLS2=lm(Y~Xhat,data=dat3)
summary(TSLS2)

# The 2sls regression again recover the same biased estimates as MR-SEM. In
# the presence of pleiotropy MR-DoC outperforms the other methods.