# Estimating Vertical Transmission and Genetic Nurture using Polygenic Scores #
# Jared V. Balbona, Yongkang Kim, and Matthew C. Keller #

# If you have any questions, please feel free to email Jared and/ or Matthew at jared.v.balbona@gmail.com and matthew.c.keller@gmail.com
# And for further information, please also check out our original paper (doi.org/10.1007/s10519-020-10032-w) and the OpenMx website (https://openmx.ssri.psu.edu/).

# Thank you for following our demonstration!

# QUALTRICS LINK: https://qimr.az1.qualtrics.com/jfe/form/SV_cO294hjwW6Ik8Cy # 

############################################################################################
## KEY ##
############################################################################################
# Our dataset contains:
    # Yo:  The offspring's phenotypic value
    # Yp:  The father's phenotypic value
    # Ym:  The mother's phenotypic value
    # Tp:  A PGS made from the TRANSMITTED portion of the PATERNAL genotype
    # Tm:  A PGS made from the TRANSMITTED portion of the MATERNAL genotype
    # NTp: A PGS made from the NON-TRANSMITTED portion of the PATERNAL genotype
    # NTm: A PGS made from the NON-TRANSMITTED portion of the MATERNAL genotype

# Our model estimates:
    # VY: Phenotypic variance
    # VA: Variance due to additive
    # VF: Variance due to parental effects/ vertical transmissino
    # VE: Variance due to the offspring's unique environment
    # w:  Genetic Nurture (i.e., the passive G-E covariance due to vertical transmission)
    # f:  The direct impact of the parental phenotype on the offspring phenotype (i.e., vertical transmission)
    # g:  The increase in haplotypic (co)variances due to assortative mating
    # mu: The assortative mating co-path coefficient, equal to (Spousal Phen Cov) / (Spousal Phen Variance)^2
    # k:  The haplotypic PGS variance under no AM (a constant that is not estimated)
    # delta: The direct effect of the PGS on the phenotype
    # Omega: The covariance between a parent's phenotype and either of their haplotypic PGS's (e.g., cov(Ym, NTm))
    # h2: The proportion of phenotypic variance due to additive genetic factors (i.e., narrow-sense h2)
    # ll: The log-likelihood for our model. It essentially measures how much unexplained variation there is in our model, with higher values indicating less accuracy.

############################################################################################
# STEP O: Load data, specify options, and look at the OBSERVED variance/ covariance matrix #
############################################################################################
library(OpenMx)
library(data.table)
library(stringr)

rm(list=ls()) # Clear any output from previous scripts
setwd("path/to/your/directory")

# Specify Options:
    mxOption(NULL,"Calculate Hessian","Yes")
    mxOption(NULL,"Standard Errors","Yes")
    mxOption(NULL,"Default optimizer","NPSOL")

# Load the simulated data for this demonstration:
    Example_Data  = fread("Example_Data.txt", header = T)
    str(Example_Data)

# Make sure the column names are correctly formatted (including capitalization)
    colnames(Example_Data) == c("Yo", "Yp", "Ym", "Tp", "NTp", "Tm", "NTm")

# Look at the full observed variance/ covariance matrix:
    round(cov(Example_Data),3)

############################################################################################
# STEP 1: Create Variables and their Constraints 
############################################################################################
    
# Create Variables:
    
    # Variance Components:
    VY    = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="VY1", name="VY") # Phenotypic variance
    VF    = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="VF1", name="VF") # Variance due to VT
    VE    = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="VE1", name="VE") # Residual variance
    
    # Genetic effects:
    delta = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="delta1",name="delta") # Effect of PGS on phen
    k     = mxMatrix(type="Full", nrow=1, ncol=1, free=F, values=.5, label="k1",    name="k")     # PGS variance (if no AM)
    Omega = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="Omega1",name="Omega") # Within-person PGS-Phen covariance
    
    # Assortative mating effects:
    mu    = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="mu1", name="mu") # AM co-path coefficient
    g     = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="g1",  name="g")  # Increase in PGS (co)variances from AM
    
    # Vertical transmission effects (note that VF above is also due to VT):
    f     = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="f1",  name="f") # Vertical Transmission effect
    
# Provide the algebraic expectations for our terms (derived via path tracing):
    
    # Within-person covariances:
    w = mxAlgebra(2 * f * Omega * (1 + VY*mu), name="w") # Genetic Nurture
    Omega_Algebra <- mxAlgebra(2 * delta * g + delta * k + .5 * w, name="Omega_Algebra") # cov(Yp, NTp)
    
    # Between-spouse genetic covariance: 
    g_Algebra = mxAlgebra(Omega * mu * Omega, name="g_Algebra") # Note: This definition of g assumes equilibrium
    
    # Algebraic variance components:
    VY_Algebra = mxAlgebra(2 * Omega * delta + delta * w + VF + VE, name="VY_Algebra")
    VF_Algebra = mxAlgebra(2 * f^2 * VY * (1 + VY * mu),            name="VF_Algebra")
    
    # Covariances between the offspring phenotype and parental PGS's:
    Yo_NTp = mxAlgebra(2 * delta * g + .5 * w, name="Yo_NTp")
    Yo_NTm = mxAlgebra(2 * delta * g + .5 * w, name="Yo_NTm")
    Yo_Tp  = mxAlgebra(Yo_NTp + delta * k,     name="Yo_Tp")
    Yo_Tm  = mxAlgebra(Yo_NTm + delta * k,     name="Yo_Tm")
    
    # Covariances between the offspring phenotype and parental phenotypes:
    Yo_Ym = mxAlgebra(delta * Omega + delta * Omega * mu * VY + f * VY + f * VY * mu * VY, name="Yo_Ym")
    Yo_Yp = mxAlgebra(delta * Omega + delta * Omega * mu * VY + f * VY + f * VY * mu * VY, name="Yo_Yp")
    
    # Between-spouse covariances:
    Yp_PGSm = mxAlgebra(VY * mu * Omega, name="Yp_PGSm")
    Ym_PGSp = mxAlgebra(VY * mu * Omega, name="Ym_PGSp")
    Ym_Yp   = mxAlgebra(VY * mu * VY,    name="Ym_Yp")
    
# Use constraints to relate the variables to the algebra, and to test assumptions:  
    
    # Set the variables created above to be equal to their algebraic constraints:
    VY_Constraint    = mxConstraint(VY == VY_Algebra,       name='VY_Constraint')
    VF_Constraint    = mxConstraint(VF == VF_Algebra,       name='VF_Constraint')
    g_Constraint     = mxConstraint(g  == g_Algebra,        name='g_Constraint')
    Omega_Constraint = mxConstraint(Omega == Omega_Algebra, name='Omega_Constraint')
    
    # Constraints for testing different model assumptions:
    No_VT <- mxConstraint(f==0,  name="No_VT") # Inclusion of this constraint will result in a model that assumes no VT
    No_AM <- mxConstraint(g==0,  name="No_AM") # Inclusion of this constraint will result in a model that assumes no AM
    
############################################################################################
# STEP 2: Relate our algebraic expectations to the observed data and create the model 
############################################################################################

# Expected covariances between each variable:
    CovMatrix =    mxAlgebra(rbind(
    #       Yo       Yp       Ym       Tp        NTp      Tm         NTm
    cbind(  VY      ,Yo_Yp    ,Yo_Ym    ,Yo_Tp	  ,Yo_NTp   ,Yo_Tm     ,Yo_NTm  ),    #Yo
    cbind(  Yo_Yp   ,VY       ,Ym_Yp    ,Omega    ,Omega    ,Yp_PGSm   ,Yp_PGSm ),    #Yp
    cbind(  Yo_Ym   ,Ym_Yp    ,VY       ,Ym_PGSp  ,Ym_PGSp  ,Omega     ,Omega   ),    #Ym
    cbind(  Yo_Tp   ,Omega    ,Ym_PGSp  ,k+g      ,g        ,g         ,g       ),    #Tp
    cbind(  Yo_NTp  ,Omega    ,Ym_PGSp  ,g        ,k+g      ,g         ,g       ),    #NTp
    cbind(  Yo_Tm   ,Yp_PGSm  ,Omega    ,g        ,g        ,k+g       ,g       ),    #Tm
    cbind(  Yo_NTm  ,Yp_PGSm  ,Omega    ,g        ,g        ,g         ,k+g     )),   #NTm
    dimnames=list(colnames(Example_Data),colnames(Example_Data)),name="expCov")

# Expected means for each variable:
    Means =   mxMatrix(type="Full", nrow=1, ncol=7, free=TRUE, values= .1, 
                        label=c("meanYo","meanYp","meanYm","meanTp","meanNTp","meanTm","meanNTm"), 
                        dimnames=list(NULL, c("Yo","Yp","Ym","Tp","NTp","Tm","NTm")), name="expMean")

# Connect the variable means and covariances with one another:
    ModelExpectations = mxExpectationNormal(covariance="expCov",means="expMean", 
                                             dimnames=c("Yo","Yp","Ym","Tp","NTp","Tm", "NTm"))

# Convert data into a usable format for OpenMx:
    Example_Data_Mx = mxData(observed=Example_Data, type="raw" )

# Create fit function:
    FitFunctionML = mxFitFunctionML()

############################################################################################
# STEP 3: Run the model iteratively 
############################################################################################
    
    Start.Val.List <- VT.AM.Result <- VT.NoAM.Result <- NoVT.AM.Result <- NoVT.NoAM.Result <- data.frame()
    i = 1 # Used for indexing below
    
    for(f.Start.Val in c(0, .4)){
      for(VE.Start.Val in c(.2, .5, .8)){
        for(delta.Start.Val in c(0, .5)){
          
          # Apply the starting values for this iteration:
          Start.Val.List = rbind(Start.Val.List, data.frame(f.Start.Val=f.Start.Val, VE.Start.Val=VE.Start.Val, delta.Start.Val=delta.Start.Val))
          
          VE    <- mxMatrix(type="Full", nrow=1	,ncol=1, free=TRUE, values=VE.Start.Val,    label="VE1",   name="VE")
          delta <- mxMatrix(type="Full", nrow=1	,ncol=1, free=TRUE, values=delta.Start.Val, label="delta1",name="delta")
          f     <- mxMatrix(type="Full", nrow=1	,ncol=1, free=TRUE, values=f.Start.Val,     label="f1",    name="f")
          
          # Specify what parameters we're going to be including in our model:
          Params <- list( VY, VY_Algebra, VY_Constraint, VF, VF_Algebra, VF_Constraint, VE,    # Variance Components
                          Omega, Omega_Algebra, Omega_Constraint, delta, k,                    # Genetic Effects
                          mu, g, g_Algebra, g_Constraint,                                                 # AM Effects
                          f, w,                                                                # VT Effects
                          Yo_Yp, Yo_Ym, Yo_Tp, Yo_Tm, Yo_NTp, Yo_NTm, Yp_PGSm, Ym_PGSp, Ym_Yp, # Relative Covariances
                          FitFunctionML, Means, ModelExpectations, CovMatrix)                  # Model Parameters
          
          # Construct each model based on their different assumptions; to keep track, name them based on the iteration number
          assign(paste0("VT.AM.It",i),    mxModel("VT_AM",    Params, Example_Data_Mx                )) # VT and g both freely estimable
          assign(paste0("VT.NoAM.It",i),  mxModel("VT_NoAM",  Params, Example_Data_Mx, No_AM         )) # Assumes g = Omega^2 mu
          assign(paste0("NoVT.AM.It",i),  mxModel("NoVT_AM",  Params, Example_Data_Mx, No_VT         )) # Assumes No VT
          assign(paste0("NoVT.NoAM.It",i),mxModel("NoVT_NoAM",Params, Example_Data_Mx, No_VT, No_AM  )) # Assumes No VT and g = Omega^2 mu
          
          # Fit each of those different models to our data-- Again, we'll give the output a name that includes the iteration number.
          assign(paste0("VT.AM.It",i,".Fit"),     try(mxRun(eval(parse(text=paste0("VT.AM.It",i))),     intervals=F, silent=T), silent=T))
          assign(paste0("VT.NoAM.It",i,".Fit"),   try(mxRun(eval(parse(text=paste0("VT.NoAM.It",i))),   intervals=F, silent=T), silent=T))
          assign(paste0("NoVT.AM.It",i,".Fit"),   try(mxRun(eval(parse(text=paste0("NoVT.AM.It",i))),   intervals=F, silent=T), silent=T))
          assign(paste0("NoVT.NoAM.It",i,".Fit"), try(mxRun(eval(parse(text=paste0("NoVT.NoAM.It",i))), intervals=F, silent=T), silent=T))
          
          # If the model ran without errors in this iteration, then record its estimated log likelihood; if not, then assign NA
          VT.AM.ll     <- ifelse(class(eval(parse(text=paste0("VT.AM.It",    i,".Fit")))) != "try-error", summary(eval(parse(text=paste0("VT.AM.It",     i, ".Fit"))))$Minus2LogLikelihood, NA)
          VT.NoAM.ll   <- ifelse(class(eval(parse(text=paste0("VT.NoAM.It",  i,".Fit")))) != "try-error", summary(eval(parse(text=paste0("VT.NoAM.It",   i, ".Fit"))))$Minus2LogLikelihood, NA)
          NoVT.AM.ll   <- ifelse(class(eval(parse(text=paste0("NoVT.AM.It",  i,".Fit")))) != "try-error", summary(eval(parse(text=paste0("NoVT.AM.It",   i, ".Fit"))))$Minus2LogLikelihood, NA)
          NoVT.NoAM.ll <- ifelse(class(eval(parse(text=paste0("NoVT.NoAM.It",i,".Fit")))) != "try-error", summary(eval(parse(text=paste0("NoVT.NoAM.It", i, ".Fit"))))$Minus2LogLikelihood, NA)
          
          # Record the iteration number, log likelihood, and start values for the current iteration:
          VT.AM.Result1     <- data.frame(rbind(c(Iteration=i, ll=VT.AM.ll,     f.Start.Val=f.Start.Val, VE.Start.Val=VE.Start.Val, delta.Start.Val=delta.Start.Val)))
          VT.NoAM.Result1   <- data.frame(rbind(c(Iteration=i, ll=VT.NoAM.ll,   f.Start.Val=f.Start.Val, VE.Start.Val=VE.Start.Val, delta.Start.Val=delta.Start.Val)))
          NoVT.AM.Result1   <- data.frame(rbind(c(Iteration=i, ll=NoVT.AM.ll,   f.Start.Val=f.Start.Val, VE.Start.Val=VE.Start.Val, delta.Start.Val=delta.Start.Val)))
          NoVT.NoAM.Result1 <- data.frame(rbind(c(Iteration=i, ll=NoVT.NoAM.ll, f.Start.Val=f.Start.Val, VE.Start.Val=VE.Start.Val, delta.Start.Val=delta.Start.Val)))
          
          # Combine these results with those from previous iterations:
          VT.AM.Result    = rbind(VT.AM.Result,    VT.AM.Result1)
          VT.NoAM.Result   = rbind(VT.NoAM.Result,   VT.NoAM.Result1)
          NoVT.AM.Result  = rbind(NoVT.AM.Result,  NoVT.AM.Result1)
          NoVT.NoAM.Result = rbind(NoVT.NoAM.Result, NoVT.NoAM.Result1)
          
          i = i + 1 # Change the iteration number
        }
      }
    }
    
    ###################################################################################################################################
    # STEP 4: Choose the best iterations for each model, and compare them to determine which assumptions are most likely true/ untrue #
    ###################################################################################################################################
    
    # Choose the iteration that best fit the data (i.e., with the lowest log likelihood):
    (Best.VT.AM.Iteration     = VT.AM.Result[which(VT.AM.Result$ll         == min(VT.AM.Result$ll,     na.rm=T)), 'Iteration'])
    (Best.VT.NoAM.Iteration   = VT.NoAM.Result[which(VT.NoAM.Result$ll     == min(VT.NoAM.Result$ll,   na.rm=T)), 'Iteration'])
    (Best.NoVT.AM.Iteration   = NoVT.AM.Result[which(NoVT.AM.Result$ll     == min(NoVT.AM.Result$ll,   na.rm=T)), 'Iteration'])
    (Best.NoVT.NoAM.Iteration = NoVT.NoAM.Result[which(NoVT.NoAM.Result$ll == min(NoVT.NoAM.Result$ll, na.rm=T)), 'Iteration'])
    
    # Collect those iterations:
    Best.VT.AM.Model     <- eval(parse(text=paste0("VT.AM.It",     Best.VT.AM.Iteration,     ".Fit")))
    Best.VT.NoAM.Model   <- eval(parse(text=paste0("VT.NoAM.It",   Best.VT.NoAM.Iteration,   ".Fit")))
    Best.NoVT.AM.Model   <- eval(parse(text=paste0("NoVT.AM.It",   Best.NoVT.AM.Iteration,   ".Fit")))
    Best.NoVT.NoAM.Model <- eval(parse(text=paste0("NoVT.NoAM.It", Best.NoVT.NoAM.Iteration, ".Fit")))
    
    # Compare the models to one another:
    mxCompare(Best.VT.AM.Model, Best.NoVT.AM.Model) # Tells us whether the trait is being influenced by parental effects
    mxCompare(Best.VT.AM.Model, Best.VT.NoAM.Model)  # Tells us whether or not AM is being driven by phenotypic similarity
    
    # Get the results from the best model:
    Best_Model <- Best.VT.AM.Model # Which model performed best?
    
    Final_Result = data.frame(Model = Best_Model$name,
                              VY = Best_Model$VY$values[1,1],
                              VA = Best_Model$VY$values[1,1] - Best_Model$VF$values[1,1] - Best_Model$VE$values[1,1],
                              VF = Best_Model$VF$values[1,1],
                              VE = Best_Model$VE$values[1,1],
                              delta = Best_Model$delta$values[1,1],
                              Omega = Best_Model$Omega$values[1,1],
                              k  = Best_Model$k$values[1,1],
                              w  = Best_Model$w$result[1,1],
                              f  = Best_Model$f$values[1,1],
                              mu = Best_Model$mu$values[1,1],
                              g  = Best_Model$g$values[1,1],
                              Omega2mu = (Best_Model$Omega$values[1,1])^2 * Best_Model$mu$values[1,1],
                              h2 = (Best_Model$VY$values[1,1] - Best_Model$VF$values[1,1] - Best_Model$VE$values[1,1])/Best_Model$VY$values[1,1],
                              ll = Best_Model$output$Minus2LogLikelihood)
    
    # Final Result!
    Final_Result
    
    # If you prefer rounded numbers:
    Final_Result_Rounded <- cbind(Final_Result$Model, round(Final_Result[,2:ncol(Final_Result)],4))
    
    # If you prefer the variance components to be scaled such that they are out of 1:
    Final_Result[,c('VY','VA','VF','VE')] <- Final_Result[,c('VY','VA','VF','VE')] / Final_Result[,'VY']; Final_Result