# Estimating Vertical Transmission and Genetic Nurture using Polygenic Scores # # Jared V. Balbona, Yongkang Kim, and Matthew C. Keller # # If you have any questions, please feel free to email Jared and/ or Matthew at jared.v.balbona@gmail.com and matthew.c.keller@gmail.com # And for further information, please also check out our original paper (doi.org/10.1007/s10519-020-10032-w) and the OpenMx website (https://openmx.ssri.psu.edu/). # Thank you for following our demonstration! # QUALTRICS LINK: https://qimr.az1.qualtrics.com/jfe/form/SV_cO294hjwW6Ik8Cy # ############################################################################################ ## KEY ## ############################################################################################ # Our dataset contains: # Yo: The offspring's phenotypic value # Yp: The father's phenotypic value # Ym: The mother's phenotypic value # Tp: A PGS made from the TRANSMITTED portion of the PATERNAL genotype # Tm: A PGS made from the TRANSMITTED portion of the MATERNAL genotype # NTp: A PGS made from the NON-TRANSMITTED portion of the PATERNAL genotype # NTm: A PGS made from the NON-TRANSMITTED portion of the MATERNAL genotype # Our model estimates: # VY: Phenotypic variance # VA: Variance due to additive # VF: Variance due to parental effects/ vertical transmissino # VE: Variance due to the offspring's unique environment # w: Genetic Nurture (i.e., the passive G-E covariance due to vertical transmission) # f: The direct impact of the parental phenotype on the offspring phenotype (i.e., vertical transmission) # g: The increase in haplotypic (co)variances due to assortative mating # mu: The assortative mating co-path coefficient, equal to (Spousal Phen Cov) / (Spousal Phen Variance)^2 # k: The haplotypic PGS variance under no AM (a constant that is not estimated) # delta: The direct effect of the PGS on the phenotype # Omega: The covariance between a parent's phenotype and either of their haplotypic PGS's (e.g., cov(Ym, NTm)) # h2: The proportion of phenotypic variance due to additive genetic factors (i.e., narrow-sense h2) # ll: The log-likelihood for our model. It essentially measures how much unexplained variation there is in our model, with higher values indicating less accuracy. ############################################################################################ # STEP O: Load data, specify options, and look at the OBSERVED variance/ covariance matrix # ############################################################################################ library(OpenMx) library(data.table) library(stringr) rm(list=ls()) # Clear any output from previous scripts setwd("path/to/your/directory") # Specify Options: mxOption(NULL,"Calculate Hessian","Yes") mxOption(NULL,"Standard Errors","Yes") mxOption(NULL,"Default optimizer","NPSOL") # Load the simulated data for this demonstration: Example_Data = fread("Example_Data.txt", header = T) str(Example_Data) # Make sure the column names are correctly formatted (including capitalization) colnames(Example_Data) == c("Yo", "Yp", "Ym", "Tp", "NTp", "Tm", "NTm") # Look at the full observed variance/ covariance matrix: round(cov(Example_Data),3) ############################################################################################ # STEP 1: Create Variables and their Constraints ############################################################################################ # Create Variables: # Variance Components: VY = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="VY1", name="VY") # Phenotypic variance VF = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="VF1", name="VF") # Variance due to VT VE = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="VE1", name="VE") # Residual variance # Genetic effects: delta = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="delta1",name="delta") # Effect of PGS on phen k = mxMatrix(type="Full", nrow=1, ncol=1, free=F, values=.5, label="k1", name="k") # PGS variance (if no AM) Omega = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="Omega1",name="Omega") # Within-person PGS-Phen covariance # Assortative mating effects: mu = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="mu1", name="mu") # AM co-path coefficient g = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="g1", name="g") # Increase in PGS (co)variances from AM # Vertical transmission effects (note that VF above is also due to VT): f = mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=.2, label="f1", name="f") # Vertical Transmission effect # Provide the algebraic expectations for our terms (derived via path tracing): # Within-person covariances: w = mxAlgebra(2 * f * Omega * (1 + VY*mu), name="w") # Genetic Nurture Omega_Algebra <- mxAlgebra(2 * delta * g + delta * k + .5 * w, name="Omega_Algebra") # cov(Yp, NTp) # Between-spouse genetic covariance: g_Algebra = mxAlgebra(Omega * mu * Omega, name="g_Algebra") # Note: This definition of g assumes equilibrium # Algebraic variance components: VY_Algebra = mxAlgebra(2 * Omega * delta + delta * w + VF + VE, name="VY_Algebra") VF_Algebra = mxAlgebra(2 * f^2 * VY * (1 + VY * mu), name="VF_Algebra") # Covariances between the offspring phenotype and parental PGS's: Yo_NTp = mxAlgebra(2 * delta * g + .5 * w, name="Yo_NTp") Yo_NTm = mxAlgebra(2 * delta * g + .5 * w, name="Yo_NTm") Yo_Tp = mxAlgebra(Yo_NTp + delta * k, name="Yo_Tp") Yo_Tm = mxAlgebra(Yo_NTm + delta * k, name="Yo_Tm") # Covariances between the offspring phenotype and parental phenotypes: Yo_Ym = mxAlgebra(delta * Omega + delta * Omega * mu * VY + f * VY + f * VY * mu * VY, name="Yo_Ym") Yo_Yp = mxAlgebra(delta * Omega + delta * Omega * mu * VY + f * VY + f * VY * mu * VY, name="Yo_Yp") # Between-spouse covariances: Yp_PGSm = mxAlgebra(VY * mu * Omega, name="Yp_PGSm") Ym_PGSp = mxAlgebra(VY * mu * Omega, name="Ym_PGSp") Ym_Yp = mxAlgebra(VY * mu * VY, name="Ym_Yp") # Use constraints to relate the variables to the algebra, and to test assumptions: # Set the variables created above to be equal to their algebraic constraints: VY_Constraint = mxConstraint(VY == VY_Algebra, name='VY_Constraint') VF_Constraint = mxConstraint(VF == VF_Algebra, name='VF_Constraint') g_Constraint = mxConstraint(g == g_Algebra, name='g_Constraint') Omega_Constraint = mxConstraint(Omega == Omega_Algebra, name='Omega_Constraint') # Constraints for testing different model assumptions: No_VT <- mxConstraint(f==0, name="No_VT") # Inclusion of this constraint will result in a model that assumes no VT No_AM <- mxConstraint(g==0, name="No_AM") # Inclusion of this constraint will result in a model that assumes no AM ############################################################################################ # STEP 2: Relate our algebraic expectations to the observed data and create the model ############################################################################################ # Expected covariances between each variable: CovMatrix = mxAlgebra(rbind( # Yo Yp Ym Tp NTp Tm NTm cbind( VY ,Yo_Yp ,Yo_Ym ,Yo_Tp ,Yo_NTp ,Yo_Tm ,Yo_NTm ), #Yo cbind( Yo_Yp ,VY ,Ym_Yp ,Omega ,Omega ,Yp_PGSm ,Yp_PGSm ), #Yp cbind( Yo_Ym ,Ym_Yp ,VY ,Ym_PGSp ,Ym_PGSp ,Omega ,Omega ), #Ym cbind( Yo_Tp ,Omega ,Ym_PGSp ,k+g ,g ,g ,g ), #Tp cbind( Yo_NTp ,Omega ,Ym_PGSp ,g ,k+g ,g ,g ), #NTp cbind( Yo_Tm ,Yp_PGSm ,Omega ,g ,g ,k+g ,g ), #Tm cbind( Yo_NTm ,Yp_PGSm ,Omega ,g ,g ,g ,k+g )), #NTm dimnames=list(colnames(Example_Data),colnames(Example_Data)),name="expCov") # Expected means for each variable: Means = mxMatrix(type="Full", nrow=1, ncol=7, free=TRUE, values= .1, label=c("meanYo","meanYp","meanYm","meanTp","meanNTp","meanTm","meanNTm"), dimnames=list(NULL, c("Yo","Yp","Ym","Tp","NTp","Tm","NTm")), name="expMean") # Connect the variable means and covariances with one another: ModelExpectations = mxExpectationNormal(covariance="expCov",means="expMean", dimnames=c("Yo","Yp","Ym","Tp","NTp","Tm", "NTm")) # Convert data into a usable format for OpenMx: Example_Data_Mx = mxData(observed=Example_Data, type="raw" ) # Create fit function: FitFunctionML = mxFitFunctionML() ############################################################################################ # STEP 3: Run the model iteratively ############################################################################################ Start.Val.List <- VT.AM.Result <- VT.NoAM.Result <- NoVT.AM.Result <- NoVT.NoAM.Result <- data.frame() i = 1 # Used for indexing below for(f.Start.Val in c(0, .4)){ for(VE.Start.Val in c(.2, .5, .8)){ for(delta.Start.Val in c(0, .5)){ # Apply the starting values for this iteration: Start.Val.List = rbind(Start.Val.List, data.frame(f.Start.Val=f.Start.Val, VE.Start.Val=VE.Start.Val, delta.Start.Val=delta.Start.Val)) VE <- mxMatrix(type="Full", nrow=1 ,ncol=1, free=TRUE, values=VE.Start.Val, label="VE1", name="VE") delta <- mxMatrix(type="Full", nrow=1 ,ncol=1, free=TRUE, values=delta.Start.Val, label="delta1",name="delta") f <- mxMatrix(type="Full", nrow=1 ,ncol=1, free=TRUE, values=f.Start.Val, label="f1", name="f") # Specify what parameters we're going to be including in our model: Params <- list( VY, VY_Algebra, VY_Constraint, VF, VF_Algebra, VF_Constraint, VE, # Variance Components Omega, Omega_Algebra, Omega_Constraint, delta, k, # Genetic Effects mu, g, g_Algebra, g_Constraint, # AM Effects f, w, # VT Effects Yo_Yp, Yo_Ym, Yo_Tp, Yo_Tm, Yo_NTp, Yo_NTm, Yp_PGSm, Ym_PGSp, Ym_Yp, # Relative Covariances FitFunctionML, Means, ModelExpectations, CovMatrix) # Model Parameters # Construct each model based on their different assumptions; to keep track, name them based on the iteration number assign(paste0("VT.AM.It",i), mxModel("VT_AM", Params, Example_Data_Mx )) # VT and g both freely estimable assign(paste0("VT.NoAM.It",i), mxModel("VT_NoAM", Params, Example_Data_Mx, No_AM )) # Assumes g = Omega^2 mu assign(paste0("NoVT.AM.It",i), mxModel("NoVT_AM", Params, Example_Data_Mx, No_VT )) # Assumes No VT assign(paste0("NoVT.NoAM.It",i),mxModel("NoVT_NoAM",Params, Example_Data_Mx, No_VT, No_AM )) # Assumes No VT and g = Omega^2 mu # Fit each of those different models to our data-- Again, we'll give the output a name that includes the iteration number. assign(paste0("VT.AM.It",i,".Fit"), try(mxRun(eval(parse(text=paste0("VT.AM.It",i))), intervals=F, silent=T), silent=T)) assign(paste0("VT.NoAM.It",i,".Fit"), try(mxRun(eval(parse(text=paste0("VT.NoAM.It",i))), intervals=F, silent=T), silent=T)) assign(paste0("NoVT.AM.It",i,".Fit"), try(mxRun(eval(parse(text=paste0("NoVT.AM.It",i))), intervals=F, silent=T), silent=T)) assign(paste0("NoVT.NoAM.It",i,".Fit"), try(mxRun(eval(parse(text=paste0("NoVT.NoAM.It",i))), intervals=F, silent=T), silent=T)) # If the model ran without errors in this iteration, then record its estimated log likelihood; if not, then assign NA VT.AM.ll <- ifelse(class(eval(parse(text=paste0("VT.AM.It", i,".Fit")))) != "try-error", summary(eval(parse(text=paste0("VT.AM.It", i, ".Fit"))))$Minus2LogLikelihood, NA) VT.NoAM.ll <- ifelse(class(eval(parse(text=paste0("VT.NoAM.It", i,".Fit")))) != "try-error", summary(eval(parse(text=paste0("VT.NoAM.It", i, ".Fit"))))$Minus2LogLikelihood, NA) NoVT.AM.ll <- ifelse(class(eval(parse(text=paste0("NoVT.AM.It", i,".Fit")))) != "try-error", summary(eval(parse(text=paste0("NoVT.AM.It", i, ".Fit"))))$Minus2LogLikelihood, NA) NoVT.NoAM.ll <- ifelse(class(eval(parse(text=paste0("NoVT.NoAM.It",i,".Fit")))) != "try-error", summary(eval(parse(text=paste0("NoVT.NoAM.It", i, ".Fit"))))$Minus2LogLikelihood, NA) # Record the iteration number, log likelihood, and start values for the current iteration: VT.AM.Result1 <- data.frame(rbind(c(Iteration=i, ll=VT.AM.ll, f.Start.Val=f.Start.Val, VE.Start.Val=VE.Start.Val, delta.Start.Val=delta.Start.Val))) VT.NoAM.Result1 <- data.frame(rbind(c(Iteration=i, ll=VT.NoAM.ll, f.Start.Val=f.Start.Val, VE.Start.Val=VE.Start.Val, delta.Start.Val=delta.Start.Val))) NoVT.AM.Result1 <- data.frame(rbind(c(Iteration=i, ll=NoVT.AM.ll, f.Start.Val=f.Start.Val, VE.Start.Val=VE.Start.Val, delta.Start.Val=delta.Start.Val))) NoVT.NoAM.Result1 <- data.frame(rbind(c(Iteration=i, ll=NoVT.NoAM.ll, f.Start.Val=f.Start.Val, VE.Start.Val=VE.Start.Val, delta.Start.Val=delta.Start.Val))) # Combine these results with those from previous iterations: VT.AM.Result = rbind(VT.AM.Result, VT.AM.Result1) VT.NoAM.Result = rbind(VT.NoAM.Result, VT.NoAM.Result1) NoVT.AM.Result = rbind(NoVT.AM.Result, NoVT.AM.Result1) NoVT.NoAM.Result = rbind(NoVT.NoAM.Result, NoVT.NoAM.Result1) i = i + 1 # Change the iteration number } } } ################################################################################################################################### # STEP 4: Choose the best iterations for each model, and compare them to determine which assumptions are most likely true/ untrue # ################################################################################################################################### # Choose the iteration that best fit the data (i.e., with the lowest log likelihood): (Best.VT.AM.Iteration = VT.AM.Result[which(VT.AM.Result$ll == min(VT.AM.Result$ll, na.rm=T)), 'Iteration']) (Best.VT.NoAM.Iteration = VT.NoAM.Result[which(VT.NoAM.Result$ll == min(VT.NoAM.Result$ll, na.rm=T)), 'Iteration']) (Best.NoVT.AM.Iteration = NoVT.AM.Result[which(NoVT.AM.Result$ll == min(NoVT.AM.Result$ll, na.rm=T)), 'Iteration']) (Best.NoVT.NoAM.Iteration = NoVT.NoAM.Result[which(NoVT.NoAM.Result$ll == min(NoVT.NoAM.Result$ll, na.rm=T)), 'Iteration']) # Collect those iterations: Best.VT.AM.Model <- eval(parse(text=paste0("VT.AM.It", Best.VT.AM.Iteration, ".Fit"))) Best.VT.NoAM.Model <- eval(parse(text=paste0("VT.NoAM.It", Best.VT.NoAM.Iteration, ".Fit"))) Best.NoVT.AM.Model <- eval(parse(text=paste0("NoVT.AM.It", Best.NoVT.AM.Iteration, ".Fit"))) Best.NoVT.NoAM.Model <- eval(parse(text=paste0("NoVT.NoAM.It", Best.NoVT.NoAM.Iteration, ".Fit"))) # Compare the models to one another: mxCompare(Best.VT.AM.Model, Best.NoVT.AM.Model) # Tells us whether the trait is being influenced by parental effects mxCompare(Best.VT.AM.Model, Best.VT.NoAM.Model) # Tells us whether or not AM is being driven by phenotypic similarity # Get the results from the best model: Best_Model <- Best.VT.AM.Model # Which model performed best? Final_Result = data.frame(Model = Best_Model$name, VY = Best_Model$VY$values[1,1], VA = Best_Model$VY$values[1,1] - Best_Model$VF$values[1,1] - Best_Model$VE$values[1,1], VF = Best_Model$VF$values[1,1], VE = Best_Model$VE$values[1,1], delta = Best_Model$delta$values[1,1], Omega = Best_Model$Omega$values[1,1], k = Best_Model$k$values[1,1], w = Best_Model$w$result[1,1], f = Best_Model$f$values[1,1], mu = Best_Model$mu$values[1,1], g = Best_Model$g$values[1,1], Omega2mu = (Best_Model$Omega$values[1,1])^2 * Best_Model$mu$values[1,1], h2 = (Best_Model$VY$values[1,1] - Best_Model$VF$values[1,1] - Best_Model$VE$values[1,1])/Best_Model$VY$values[1,1], ll = Best_Model$output$Minus2LogLikelihood) # Final Result! Final_Result # If you prefer rounded numbers: Final_Result_Rounded <- cbind(Final_Result$Model, round(Final_Result[,2:ncol(Final_Result)],4)) # If you prefer the variance components to be scaled such that they are out of 1: Final_Result[,c('VY','VA','VF','VE')] <- Final_Result[,c('VY','VA','VF','VE')] / Final_Result[,'VY']; Final_Result