# ---------------------------------------------------------------------------------------------------------------------- ##### Day 2 Practical: Introduction to Sex limitation # ---------------------------------------------------------------------------------------------------------------------- # This file contains scripts used to answer practical questions. # Below is a quick guide into each of the sections of the code. # You can scroll down to specific lines of code or search for the string in # square brackets to jump to the first line of a section. # We recommend writing down or copy summary tables of goodness-of-fit statistics # and estimates into separate files as you go along. # lines 29-211 [@4ADE]: 4-group ADE script used to model data from same-sex female and male twins simultaneously # lines 244-361 [@5SAT]: 5-group saturated script used to model data from same-sex and opposite-sex twins simultaneously # ---------------------------------------------------------------------------------------------------------------------- # Go To Qualtrics Practical # https://qimr.az1.qualtrics.com/jfe/form/SV_eXmQtPdjWv4yO9g # Remember to set your working directory to YOUR folder in the workshop cloud # setwd("~/elizabeth") ##### # ---------------------------------------------------------------------------------------------------------------------- # Modified Program: oneADE4vca_p1.R # Twin Univariate ADE model to estimate causes of variation across multiple groups # Matrix style model - Raw data - Continuous data # -------|---------|---------|---------|---------|---------|---------|---------|---------|---------|---------|---------| rm(list=ls()) # Load Data library(OpenMx) library(psych) mxOption( NULL, "Default optimizer", "NPSOL" ) source("miFunctions.R") # Note: If this doesn't open for you, please make sure that your working directory is set correctly twinData1 <- read.table(file = "TwinData2024.csv", header = TRUE, sep = ",") twinData <- twinData1 dim(twinData) describe(twinData[,11:12], skew=F) # Select Variables for Analysis vars <- 'bmi' # list of variables names nv <- 1 # number of variables ntv <- nv*2 # number of total variables selVars <- paste(vars,c(rep(1,nv),rep(2,nv)),sep="") # Select Covariates for Analysis twinData[,'age'] <- twinData[,'age']/100 twinData <- twinData[-which(is.na(twinData$age)),] covVars <- 'age' # Select Data for Analysis mzfData <- subset(twinData, zyg==1, c(selVars, covVars)) dzfData <- subset(twinData, zyg==3, c(selVars, covVars)) mzmData <- subset(twinData, zyg==2, c(selVars, covVars)) dzmData <- subset(twinData, zyg==4, c(selVars, covVars)) # Set Starting Values svBe <- 0.01 # start value for regressions svMe <- 20 # start value for means svPa <- 0.1 # start value for path coefficient svPe <- 0.4 # start value for path coefficient for e # ---------------------------------------------------------------------------------------------------------------------- # PREPARE MODEL # Create Matrices for Covariates and linear Regression Coefficients defL <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.age"), name="defL" ) pathBf <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=svBe, label="bf11", name="bf" ) pathBm <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=svBe, label="bm11", name="bm" ) # Create Algebra for expected Mean Matrices meanGf <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=svMe, labels=c("mZf","mZf"), name="meanGf" ) meanGm <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=svMe, labels=c("mZm","mZm"), name="meanGm" ) expMeanZf <- mxAlgebra( expression= meanGf + cbind(bf%*%defL,bf%*%defL), name="expMeanZf" ) expMeanZm <- mxAlgebra( expression= meanGm + cbind(bm%*%defL,bm%*%defL), name="expMeanZm" ) # Create Matrices for Path Coefficients covAf <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=svPa, label="VAf11", name="VAf" ) covDf <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=svPa, label="VDf11", name="VDf" ) covEf <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=svPe, label="VEf11", name="VEf" ) covAm <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=svPa, label="VAm11", name="VAm" ) covDm <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=svPa, label="VDm11", name="VDm" ) covEm <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=svPe, label="VEm11", name="VEm" ) # Create Algebra for expected Variance/Covariance Matrices in MZ & DZ twins covPf <- mxAlgebra( expression= VAf+VDf+VEf, name="Vf" ) covPm <- mxAlgebra( expression= VAm+VDm+VEm, name="Vm" ) covMZf <- mxAlgebra( expression= VAf+VDf, name="cMZf" ) covDZf <- mxAlgebra( expression= 0.5%x%VAf+ 0.25%x%VDf, name="cDZf" ) covMZm <- mxAlgebra( expression= VAm+VDm, name="cMZm" ) covDZm <- mxAlgebra( expression= 0.5%x%VAm+ 0.25%x%VDm, name="cDZm" ) expCovMZf <- mxAlgebra( expression= rbind( cbind(Vf, cMZf), cbind(t(cMZf), Vf)), name="expCovMZf" ) expCovDZf <- mxAlgebra( expression= rbind( cbind(Vf, cDZf), cbind(t(cDZf), Vf)), name="expCovDZf" ) expCovMZm <- mxAlgebra( expression= rbind( cbind(Vm, cMZm), cbind(t(cMZm), Vm)), name="expCovMZm" ) expCovDZm <- mxAlgebra( expression= rbind( cbind(Vm, cDZm), cbind(t(cDZm), Vm)), name="expCovDZm" ) # Create Data Objects for Multiple Groups dataMZf <- mxData( observed=mzfData, type="raw" ) dataDZf <- mxData( observed=dzfData, type="raw" ) dataMZm <- mxData( observed=mzmData, type="raw" ) dataDZm <- mxData( observed=dzmData, type="raw" ) # Create Expectation Objects for Multiple Groups expMZf <- mxExpectationNormal( covariance="expCovMZf", means="expMeanZf", dimnames=selVars ) expDZf <- mxExpectationNormal( covariance="expCovDZf", means="expMeanZf", dimnames=selVars ) expMZm <- mxExpectationNormal( covariance="expCovMZm", means="expMeanZm", dimnames=selVars ) expDZm <- mxExpectationNormal( covariance="expCovDZm", means="expMeanZm", dimnames=selVars ) funML <- mxFitFunctionML() # Create Model Objects for Multiple Groups parsZf <- list( pathBf, covAf, covDf, covEf, covPf ) parsZm <- list( pathBm, covAm, covDm, covEm, covPm ) defs <- list( defL ) modelMZf <- mxModel( name="MZf", parsZf, defs, meanGf, expMeanZf, covMZf, expCovMZf, dataMZf, expMZf, funML ) modelDZf <- mxModel( name="DZf", parsZf, defs, meanGf, expMeanZf, covDZf, expCovDZf, dataDZf, expDZf, funML ) modelMZm <- mxModel( name="MZm", parsZm, defs, meanGm, expMeanZm, covMZm, expCovMZm, dataMZm, expMZm, funML ) modelDZm <- mxModel( name="DZm", parsZm, defs, meanGm, expMeanZm, covDZm, expCovDZm, dataDZm, expDZm, funML ) multi <- mxFitFunctionMultigroup( c("MZf","DZf","MZm","DZm") ) # Create Algebra for Variance Components rowUS <- rep('US',nv) colUS <- rep(c('VAf','VDf','VEf','SAf','SDf','SEf','VAm','VDm','VEm','SAm','SDm','SEm'),each=nv) estUS <- mxAlgebra( expression=cbind(VAf,VDf,VEf,VAf/Vf,VDf/Vf,VEf/Vf,VAm,VDm,VEm,VAm/Vm,VDm/Vm,VEm/Vm), name="US", dimnames=list(rowUS,colUS)) # Create Confidence Interval Objects ciADE <- mxCI( c("US[1,1:3]","US[1,7:9]") ) #ciADE <- mxCI( "US[1,1:12]" ) # Build Model with Confidence Intervals modelADEq <- mxModel( "oneADEq4vca", parsZf, parsZm, modelMZf, modelDZf, modelMZm, modelDZm, multi, estUS, ciADE ) # ---------------------------------------------------------------------------------------------------------------------- # RUN MODEL # Run ADEq Model - Quantitative Sex Differences ADE model fitADEq <- mxRun( modelADEq, intervals=T ) sumADEq <- summary( fitADEq ) # QUESTION: What is fitADEq in simple terms? What does it contain? # ANSWER: FitADEq is an object that contains all the estimated parameters, data, # defined matrices, and of the ADE model called "modelADEq". # If you have time, take a look at the object fitADEq and explore. # Print Goodness-of-fit Statistics & Parameter Estimates fitGofs(fitADEq) fitEsts(fitADEq) fitEstCis(fitADEq) # This code summarizes the unstandardized (VAf, VAm, etc..) variance components # estimates produced from fitADEq. This output also produces standardized # variance component estimates. round(fitADEq$US$result,4) # STRETCH QUESTIONS/TASKS # QUESTION: What are the estimates and 95% CI of ADE in males and females? # lbound estimate ubound # oneADEq4vca.US[1,1] 0.0681 0.3674 0.6530 # oneADEq4vca.US[1,2] -0.0459 0.2250 0.5244 # oneADEq4vca.US[1,3] 0.1503 0.1690 0.1910 # oneADEq4vca.US[1,7] -0.2054 0.1032 0.3965 # oneADEq4vca.US[1,8] 0.0929 0.3758 0.6960 # oneADEq4vca.US[1,9] 0.1157 0.1375 0.1650 # STRETCH QUESTION: What is meant by a standardized variance component? # # ---------------------------------------------------------------------------------------------------------------------- # RUN SUBMODELS # Run ADE model - Test for Quantitative Sex Differences of ADE model modelADE <- mxModel( fitADEq, name="oneADE4vca" ) modelADE <- omxSetParameters( modelADE, labels=c("VAf11","VAm11"), free=TRUE, values=svPa, newlabels='VA11' ) modelADE <- omxSetParameters( modelADE, labels=c("VDf11","VDm11"), free=TRUE, values=svPa, newlabels='VD11' ) modelADE <- omxSetParameters( modelADE, labels=c("VEf11","VEm11"), free=TRUE, values=svPa, newlabels='VE11' ) fitADE <- mxRun( modelADE, intervals=T ) fitGofs(fitADE); fitEsts(fitADE) # # Above, we are Fitting a submodel of fitADEq that takes all the details from fit ADEq and then # equates the variance component estimates from A, D, and E between males and females # by making their labels have the same name. mxCompare( fitADEq, fitADE ) round(rbind(fitADEq$US$result,fitADE$US$result),4) # Run submodels with sex differences based on previous result to evaluate significance of parameters # ---------------------------------------------------------- # Test Significance of Sources of Variance of ADEq model with Sex differences # Run AEq model modelAEq <- mxModel( fitADEq, name="oneAEq4vca" ) modelAEq <- omxSetParameters( modelAEq, labels=c("VDf11","VDm11"), free=FALSE, values=0 ) fitAEq <- mxRun( modelAEq, intervals=T ) mxCompare( fitADEq, fitAEq ) fitGofs(fitAEq); fitEsts(fitAEq) # Run Eq model modelEq <- mxModel( fitAEq, name="oneEq4vca" ) modelEq <- omxSetParameters( modelEq, labels=c("VAf11","VAm11"), free=FALSE, values=0 ) fitEq <- mxRun( modelEq, intervals=T ) mxCompare( fitAEq, fitEq ) fitGofs(fitEq); fitEsts(fitEq) # Print Comparative Fit Statistics mxCompare( fitADEq, nested <- list(fitAEq, fitEq) ) round(rbind(fitADEq$US$result,fitAEq$US$result,fitEq$US$result),4) # STRETCH QUESTIONS # QUESTION: Heritability refers to the proportion of phenotypic variation # for a trait that can be attributed to genetic variation. # Narrow-sense heritability is solely an estimate of additive genetic effects—the # summed effects of multiple genes contributing to a single phenotype. # Broad-sense heritability is an estimate of both additive and non-additive # genetic effects, and thus encompasses the non-additive genetic effects, # and thus encompasses the additive, dominance and epistatic genetic effects # What is your estimate of narrow sense heritability in males and females # based on your fitADEq results? # What is your estimate of broad sense heritability in males and females? # ---------------------------------------------------------------------------------------------------------------------- ##### Stretch Tasks # ---------------------------------------------------------------------------------------------------------------------- # Modified Program: oneSAT5ca.R. Saturated model using 5 zygosity groups # # Twin Univariate Saturated model to estimate means and (co)variances across multiple groups # ---------------------------------------------------------------------------------------------------------------------- # PREPARE DATA # Load Data rm(list=ls()) source("miFunctions.R") data(twinData) dim(twinData) describe(twinData[,11:12], skew=F) # Select Variables for Analysis vars <- 'bmi' # list of variables names nv <- 1 # number of variables ntv <- nv*2 # number of total variables selVars <- paste(vars,c(rep(1,nv),rep(2,nv)),sep="") # Select Covariates for Analysis twinData[,'age'] <- twinData[,'age']/100 twinData <- twinData[-which(is.na(twinData$age)),] covVars <- 'age' # Select Data for Analysis mzfData <- subset(twinData, zyg==1, c(selVars, covVars)) dzfData <- subset(twinData, zyg==3, c(selVars, covVars)) mzmData <- subset(twinData, zyg==2, c(selVars, covVars)) dzmData <- subset(twinData, zyg==4, c(selVars, covVars)) dzoData <- subset(twinData, zyg==5, c(selVars, covVars)) #ODZ pairs ordered as T1=female, T2=male # Generate Descriptive Statistics colMeans(mzfData,na.rm=TRUE) colMeans(dzfData,na.rm=TRUE) colMeans(mzmData,na.rm=TRUE) colMeans(dzmData,na.rm=TRUE) colMeans(dzoData,na.rm=TRUE) cov(mzfData,use="complete") cov(dzfData,use="complete") cov(mzmData,use="complete") cov(dzmData,use="complete") cov(dzoData,use="complete") # Set Starting Values svBe <- 0.01 # start value for regressions svMe <- 20 # start value for means svVa <- 0.8 # start value for variance lbVa <- 0.0001 # start value for lower bounds # ---------------------------------------------------------------------------------------------------------------------- # PREPARE MODEL # Create Matrices for Covariates and linear Regression Coefficients defL <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.age"), name="defL" ) pathBf <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=.01, label="bf11", name="bf" ) pathBm <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=.01, label="bm11", name="bm" ) # Create Algebra for expected Mean Matrices meanMZf <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=svMe, labels=c("mMZf1","mMZf2"), name="meanMZf" ) meanDZf <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=svMe, labels=c("mDZf1","mDZf2"), name="meanDZf" ) meanMZm <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=svMe, labels=c("mMZm1","mMZm2"), name="meanMZm" ) meanDZm <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=svMe, labels=c("mDZm1","mDZm2"), name="meanDZm" ) meanDZo <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=svMe, labels=c("mDZo1","mDZo2"), name="meanDZo" ) expMeanMZf<- mxAlgebra( expression= meanMZf + cbind(bf%*%defL,bf%*%defL), name="expMeanMZf" ) expMeanDZf<- mxAlgebra( expression= meanDZf + cbind(bf%*%defL,bf%*%defL), name="expMeanDZf" ) expMeanMZm<- mxAlgebra( expression= meanMZm + cbind(bm%*%defL,bm%*%defL), name="expMeanMZm" ) expMeanDZm<- mxAlgebra( expression= meanDZm + cbind(bm%*%defL,bm%*%defL), name="expMeanDZm" ) expMeanDZo<- mxAlgebra( expression= meanDZo + cbind(bf%*%defL,bm%*%defL), name="expMeanDZo" ) # Create Algebra for expected Variance/Covariance Matrices covMZf <- mxMatrix( type="Symm", nrow=ntv, ncol=ntv, free=TRUE, values=valDiag(svVa,ntv), lbound=valDiag(lbVa,ntv), labels=c("vMZf1","cMZf21","vMZf2"), name="covMZf" ) covDZf <- mxMatrix( type="Symm", nrow=ntv, ncol=ntv, free=TRUE, values=valDiag(svVa,ntv), lbound=valDiag(lbVa,ntv), labels=c("vDZf1","cDZf21","vDZf2"), name="covDZf" ) covMZm <- mxMatrix( type="Symm", nrow=ntv, ncol=ntv, free=TRUE, values=valDiag(svVa,ntv), lbound=valDiag(lbVa,ntv), labels=c("vMZm1","cMZm21","vMZm2"), name="covMZm" ) covDZm <- mxMatrix( type="Symm", nrow=ntv, ncol=ntv, free=TRUE, values=valDiag(svVa,ntv), lbound=valDiag(lbVa,ntv), labels=c("vDZm1","cDZm21","vDZm2"), name="covDZm" ) covDZo <- mxMatrix( type="Symm", nrow=ntv, ncol=ntv, free=TRUE, values=valDiag(svVa,ntv), lbound=valDiag(lbVa,ntv), labels=c("vDZo1","cDZo21","vDZo2"), name="covDZo" ) # Create Algebra for Maximum Likelihood Estimates of Twin Correlations matI <- mxMatrix( type="Iden", nrow=ntv, ncol=ntv, name="I" ) corMZf <- mxAlgebra( solve(sqrt(I*covMZf)) %&% covMZf, name="corMZf" ) corDZf <- mxAlgebra( solve(sqrt(I*covDZf)) %&% covDZf, name="corDZf" ) corMZm <- mxAlgebra( solve(sqrt(I*covMZm)) %&% covMZm, name="corMZm" ) corDZm <- mxAlgebra( solve(sqrt(I*covDZm)) %&% covDZm, name="corDZm" ) corDZo <- mxAlgebra( solve(sqrt(I*covDZo)) %&% covDZo, name="corDZo" ) # Create Data Objects for Multiple Groups dataMZf <- mxData( observed=mzfData, type="raw" ) dataDZf <- mxData( observed=dzfData, type="raw" ) dataMZm <- mxData( observed=mzmData, type="raw" ) dataDZm <- mxData( observed=dzmData, type="raw" ) dataDZo <- mxData( observed=dzoData, type="raw" ) # Create Expectation Objects for Multiple Groups expMZf <- mxExpectationNormal( covariance="covMZf", means="expMeanMZf", dimnames=selVars ) expDZf <- mxExpectationNormal( covariance="covDZf", means="expMeanDZf", dimnames=selVars ) expMZm <- mxExpectationNormal( covariance="covMZm", means="expMeanMZm", dimnames=selVars ) expDZm <- mxExpectationNormal( covariance="covDZm", means="expMeanDZm", dimnames=selVars ) expDZo <- mxExpectationNormal( covariance="covDZo", means="expMeanDZo", dimnames=selVars ) funML <- mxFitFunctionML() # Create Model Objects for Multiple Groups pars <- list( pathBf, pathBm ) defs <- list( defL ) modelMZf <- mxModel( pars, defs, meanMZf, expMeanMZf, covMZf, matI, corMZf, dataMZf, expMZf, funML, name="MZf" ) modelDZf <- mxModel( pars, defs, meanDZf, expMeanDZf, covDZf, matI, corDZf, dataDZf, expDZf, funML, name="DZf" ) modelMZm <- mxModel( pars, defs, meanMZm, expMeanMZm, covMZm, matI, corMZm, dataMZm, expMZm, funML, name="MZm" ) modelDZm <- mxModel( pars, defs, meanDZm, expMeanDZm, covDZm, matI, corDZm, dataDZm, expDZm, funML, name="DZm" ) modelDZo <- mxModel( pars, defs, meanDZo, expMeanDZo, covDZo, matI, corDZo, dataDZo, expDZo, funML, name="DZo" ) multi <- mxFitFunctionMultigroup( c("MZf","DZf","MZm","DZm","DZo") ) # Create Confidence Interval Objects ciCov <- mxCI( c('MZf.covMZf','DZf.covDZf','MZm.covMZm','DZm.covDZm','DZo.covDZo' )) ciMean <- mxCI( c('MZf.expMeanMZf','DZf.expMeanDZf','MZm.expMeanMZm','DZm.expMeanDZm','DZo.expMeanDZo' )) # Build Saturated Model with Confidence Intervals modelSAT <- mxModel( "oneSAT5ca", pars, modelMZf, modelDZf, modelMZm, modelDZm, modelDZo, multi, ciCov, ciMean ) # ---------------------------------------------------------------------------------------------------------------------- # RUN MODEL # Run Saturated Model fitSAT <- mxRun( modelSAT, intervals=F ) sumSAT <- summary( fitSAT ) # Print Goodness-of-fit Statistics & Parameter Estimates fitGofs(fitSAT) fitEsts(fitSAT) mxGetExpected( fitSAT, c("means") ) mxGetExpected( fitSAT, c("covariance") ) # QUESTION: Twin correlations for each group were estimated. What are they? How did you find them? fitSAT$submodels$MZf$algebras fitSAT$submodels$DZf$algebras fitSAT$submodels$MZm$algebras fitSAT$submodels$DZm$algebras fitSAT$submodels$DZo$algebras # Try this if you want to go to the result directly. # Repeat for other zygosity groups fitSAT$submodels$DZm$algebras$corDZm$result[2,1] # QUESTION: Using the basic Holzinger (also referred to as Falconer) equations, # hand calculate variance components estimates for same sex pairs. # Describe the patterns do you see across all zygosity groups. # rMZF = 0.77652479 # rDZF = 0.31578095 # rMZM = 0.74466604 # rDZM = 0.25200612 # rDZO = 0.21884812 # Same sex pairs correlations are bigger than ODZ. MZs higher than DZs. # ADE model likely # Af- 2*(rMZF-rDZF) = 2*(0.78-0.32) = 0.92 # Cf (or Df)- 2rDZF-rMZF = 2*0.32-0.78 = -0.14 # Ef- 1-rMZF = 1-0.78 = 0.22 # # Am- 2*(rMZM-rDZM) = 2*(0.74-0.25) = 0.98 # Cm (or Dm)- 2rDZM-rMZM = 2*0.25-0.74 = -0.24 # Em- 1-rMZM = 1-0.74 = 0.26 # # ---------------------------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------------------------