# ----------------------------------------------------------------------- # Program: CreateSimData.R # Author: Matt Keller # Date: March 10, 2016 # Adapted by Lucia Colodro Conde, March 8 2018. # ----------------------------------------------------------------------- setwd("/home/lucia/Desktop/Friday_Hackathon") rm(list=ls(all=TRUE)) require(psych) require(OpenMx) source("miFunctions.R") mxOption(NULL,"Default optimizer","NPSOL") # ----------------------------------------------------------------------- # BEGIN HACKATHON! # ----------------------------------------------------------------------- # Starting things up # ----------------------------------------------------------------------- #Get our dataset from the web. #This is a cool R trick: your file might exist on your local machine #OR it might exist on the web! con <- "http://www.matthewckeller.com/public/all.dat2.txt" data <- read.table(con,header=TRUE) colnames(data) nms <- names(data) <- c("twintype","famid","tw1chol","tw2chol","tw1age","tw2age","tw1cholR","tw2cholR") # ----------------------------------------------------------------------- # Prepare Data # ----------------------------------------------------------------------- head(data) str(data) #get summary of each column in data summary(data) #make sure no weird outliers (e.g., -999) #check missingness sum(is.na(data)) # Check zygosity levels and frequencies. table(data$twintype) # Check if age of twin 2 is the same as age of twin 2. cor(data$tw1age,data$tw2age, use ="all") plot(data$tw1age,data$tw2age) # Creat two zygosity group for quick first check. mzData <- data[data$twintype %in% c('mzm','mzf'),3:6] dzData <- data[data$twintype %in% c('dzm','dzf','dzos'),3:6] dim(mzData) dim(dzData) #Always look at your data ! plot(mzData$tw1chol,mzData$tw2chol,main='MZ scatterplot') plot(dzData$tw1chol,dzData$tw2chol,main='DZ scatterplot') #Look at covariances round(cov(mzData[,1:2],use="complete"),3) #MZ Var/Covar 2x2 matrix round(cov(dzData[,1:2],use="complete"),3) #DZ Var/Covar 2x2 matrix #create object of MZ vs DZ cov's (cov.MZ <- cov(mzData,use="complete") [1,2]) (cov.DZ <- cov(dzData,use="complete") [1,2]) # ----------------------------------------------------------------------- # CHECK CTD ASSUMPTIONS AND EFFECT OF POTENTIAL COVARIATES (AGE) # Select Variables for Analysis nv <- 1 # number of variables ntv <- nv*2 # number of total variables selVars <- c('tw1chol' , 'tw2chol') defVars <- c('tw1age') useVars <- c(selVars,defVars) # ------------------------------------------------------------------ # PREPARE MODEL # Saturated Model # Set Starting Values StMean <- 0.5 # start value for means StSd <- 1 # start value for standard deviation StMZ <- .8 # start value for MZ correlation StDZ <- .4 # start value for DZ correlation # Age corrections # Regression effects beta1 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=0.1, labels='betaAge', name="beta1" ) # Independent variables obsAge <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=FALSE, labels=c("data.tw1age","data.tw1age"), name="Age") inclusions <- list (beta1, obsAge) # FOR EACH ZYGOSITY GROUP # Select Data for Analysis & specify the model # Algebra for expected Mean Matrices # Building the model # Algebra for expected Variance/Covariance Matrices in MZ & DZ twins # Build Objectives for each group fitFunction <- mxFitFunctionML(rowDiagnostics=TRUE) # mzf mzfdata <- subset(data, twintype=="mzf", useVars) head(mzfdata) mzfData <- mxData( observed=mzfdata, type="raw" ) meanfMZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=StMean, labels=c("mean1","mean1a"), name="intercept_fMZ" ) expMeanfMZ <- mxAlgebra( intercept_fMZ + beta1 %x% Age, name="regressfMZ") sdFMZ <- mxMatrix( type = "Diag", nrow=ntv, ncol=ntv, free=TRUE, values=StSd, labels=c("sd1","sd1a"), name="sd_fMZ") corfMZ <- mxMatrix( type="Stand", nrow=ntv, ncol=ntv, free=TRUE, values=StMZ, labels="cor1",name="corfMZ" ) expCovfMZ <- mxAlgebra( sd_fMZ %&% corfMZ , name="expCovfMZ") objfMZ <- mxExpectationNormal( covariance="expCovfMZ", means="regressfMZ", dimnames=selVars ) cifMZ <- mxCI('expCovfMZ') modelfMZ <- mxModel( "fMZ", inclusions, expMeanfMZ, meanfMZ, sdFMZ, corfMZ, expCovfMZ, mzfData, objfMZ,cifMZ,fitFunction ) # mzm mzmdata <- subset(data, twintype=="mzm", useVars) mzmData <- mxData( observed=mzmdata, type="raw" ) meanmMZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=StMean, labels=c("mean2","mean2a"), name="intercept_mMZ" ) expMeanmMZ <- mxAlgebra(intercept_mMZ + beta1 %x% Age, name="regressmMZ") sdMMZ <- mxMatrix( type = "Diag", nrow=ntv, ncol=ntv, free=TRUE, values=StSd, labels=c("sd2","sd2a"), name="sd_mMZ") cormMZ <- mxMatrix( type="Stand", nrow=ntv, ncol=ntv, free=TRUE, values=StMZ, labels="cor2",name="cormMZ" ) expCovmMZ <- mxAlgebra( sd_mMZ %&% cormMZ , name="expCovmMZ") objmMZ <- mxExpectationNormal( covariance="expCovmMZ", means="regressmMZ", dimnames=selVars ) cimMZ <- mxCI('expCovmMZ') modelmMZ <- mxModel( "mMZ", inclusions, expMeanmMZ, meanmMZ, sdMMZ, cormMZ, expCovmMZ, mzmData, objmMZ,cimMZ,fitFunction ) # dzf dzfdata <- subset(data, twintype=="dzf", useVars) dzfData <- mxData( observed=dzfdata, type="raw" ) meanfDZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=StMean, labels=c("mean3","mean3a"), name="intercept_fDZ" ) expMeanfDZ <- mxAlgebra(intercept_fDZ + beta1 %x% Age, name="regressfDZ") sdFDZ <- mxMatrix( type = "Diag", nrow=ntv, ncol=ntv, free=TRUE, values=StSd, labels=c("sd3","sd3a"), name="sd_fDZ") corfDZ <- mxMatrix( type="Stand", nrow=ntv, ncol=ntv, free=TRUE, values=StDZ, labels="cor3",name="corfDZ" ) expCovfDZ <- mxAlgebra( sd_fDZ %&% corfDZ , name="expCovfDZ") objfDZ <- mxExpectationNormal( covariance="expCovfDZ", means="regressfDZ", dimnames=selVars ) cifDZ <- mxCI('expCovfDZ' ) modelfDZ <- mxModel( "fDZ", inclusions, expMeanfDZ, meanfDZ, sdFDZ, corfDZ, expCovfDZ, dzfData, objfDZ,cifDZ,fitFunction ) # dzm dzmdata <- subset(data, twintype=="dzm", useVars) dzmData <- mxData( observed=dzmdata, type="raw" ) meanmDZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=StMean, labels=c("mean4","mean4a"), name="intercept_mDZ" ) expMeanmDZ <- mxAlgebra(intercept_mDZ + beta1 %x% Age, name="regressmDZ") sdMDZ <- mxMatrix( type = "Diag", nrow=ntv, ncol=ntv, free=TRUE, values=StSd, labels=c("sd4","sd4a"), name="sd_mDZ") cormDZ <- mxMatrix( type="Stand", nrow=ntv, ncol=ntv, free=TRUE, values=StDZ, labels="cor4",name="cormDZ" ) expCovmDZ <- mxAlgebra( sd_mDZ %&% cormDZ , name="expCovmDZ") objmDZ <- mxExpectationNormal( covariance="expCovmDZ", means="regressmDZ", dimnames=selVars ) cimDZ <- mxCI( 'expCovmDZ' ) modelmDZ <- mxModel( "mDZ", inclusions, expMeanmDZ, meanmDZ, sdMDZ, cormDZ, expCovmDZ, dzmData, objmDZ ,cimDZ, fitFunction) # dzmf dzmfdata <- subset(data, twintype=="dzos", useVars) dzmfData <- mxData( observed=dzmfdata, type="raw" ) meanmfDZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=StMean, labels=c("mean6","mean5"), name="intercept_mfDZ" ) expMeanmfDZ <- mxAlgebra(intercept_mfDZ + beta1 %x% Age, name="regressmfDZ") sdMFDZ <- mxMatrix( type = "Diag", nrow=ntv, ncol=ntv, free=TRUE, values=StSd, labels=c("sd6","sd5"), name="sd_mfDZ") cormfDZ <- mxMatrix( type="Stand", nrow=ntv, ncol=ntv, free=TRUE, values=StDZ, labels="cor5",name="cormfDZ" ) expCovmfDZ <- mxAlgebra( sd_mfDZ %&% cormfDZ , name="expCovmfDZ") objmfDZ <- mxExpectationNormal( covariance="expCovmfDZ", means="regressmfDZ", dimnames=selVars ) cimfDZ <- mxCI('expCovmfDZ') modelmfDZ <- mxModel( "mfDZ", inclusions, expMeanmfDZ, meanmfDZ, sdMFDZ,cormfDZ, expCovmfDZ, dzmfData, objmfDZ,cimfDZ,fitFunction ) # Sum the likelihoods modelFit <- mxFitFunctionMultigroup( c("fMZ", "mMZ", "fDZ", "mDZ", "mfDZ") ) #Putting it all together baseModel <- mxModel( "base", modelfMZ, modelmMZ, modelfDZ, modelmDZ, modelmfDZ, modelFit) # ------------------------------------------------------------------------------ # RUN MODEL baseFit <- mxRun( baseModel, intervals=F ) baseSumm <- summary( baseFit ) baseSumm ################################################################################################# # SUBMODELS ################################################################################################# # H1m: Presence of birth order effects. # Constrain expected Means to be equal across twin order meanmodel1 <- baseFit meanmodel1 <- omxSetParameters( meanmodel1, label="mean1a", newlabels='mean1' ) meanmodel1 <- omxSetParameters( meanmodel1, label="mean2a", newlabels='mean2' ) meanmodel1 <- omxSetParameters( meanmodel1, label="mean3a", newlabels='mean3' ) meanmodel1 <- omxSetParameters( meanmodel1, label="mean4a", newlabels='mean4' ) meanmodel1 <- omxAssignFirstParameters(meanmodel1) meanmodel1Fit <- mxRun( meanmodel1, intervals=F ) meanmodel1Summ <- summary( meanmodel1Fit ) meanmodel1Summ mxCompare(baseFit, meanmodel1Fit) # H2m: Homogeneity of means between MZ and DZ twins within like-sex twin pairs. # Constrain expected Means to be equal in same-sex pairs meanmodel2 <- meanmodel1Fit meanmodel2 <- omxSetParameters( meanmodel2, label="mean1", newlabels='mean3' ) meanmodel2 <- omxSetParameters( meanmodel2, label="mean2", newlabels='mean4' ) meanmodel2 <- omxAssignFirstParameters(meanmodel2) meanmodel2Fit <- mxRun( meanmodel2, intervals=F ) meanmodel2Summ <- summary( meanmodel2Fit ) meanmodel2Summ mxCompare(meanmodel1Fit, meanmodel2Fit) # H3m: Homogeneity of means between MZ and DZ twins within all female twins and within all male twins. Constrain expected Means to be equal between same sex and opposite sex pairs meanmodel3 <- meanmodel2Fit meanmodel3 <- omxSetParameters( meanmodel3, label="mean5", newlabels='mean3' ) meanmodel3 <- omxSetParameters( meanmodel3, label="mean6", newlabels='mean4' ) meanmodel3 <- omxAssignFirstParameters(meanmodel3) meanmodel3Fit <- mxRun( meanmodel3, intervals=F ) meanmodel3Summ <- summary( meanmodel3Fit ) meanmodel3Summ mxCompare(meanmodel2Fit, meanmodel3Fit) # H4m: Homogeneity of means between male and female means. Constrain expected Means to be equal across sex and zygosity pair meanmodel4 <- meanmodel3Fit meanmodel4 <- omxSetParameters( meanmodel4, label="mean3", newlabels='mean4' ) meanmodel4 <- omxAssignFirstParameters(meanmodel4) meanmodel4Fit <- mxRun( meanmodel4, intervals=F ) meanmodel4Summ <- summary( meanmodel4Fit ) meanmodel4Summ mxCompare(meanmodel3Fit, meanmodel4Fit) # Significance of covariate/s ############################################ ageModel <- meanmodel4Fit ageModel <- omxSetParameters( ageModel, label="betaAge", free=F, value=0) ageModel <- omxAssignFirstParameters(ageModel) ageModelFit <- mxRun(ageModel, intervals=F ) ageModelSumm <- summary( ageModelFit ) ageModelSumm mxCompare(meanmodel4Fit, ageModelFit) ############################################ # H1v: Presence of birth order effects . # Constrain expected var to be equal across twin order varModel1 <- ageModelFit varModel1 <- omxSetParameters( varModel1, label="sd1a", newlabels='sd1' ) varModel1 <- omxSetParameters( varModel1, label="sd2a", newlabels='sd2' ) varModel1 <- omxSetParameters( varModel1, label="sd3a", newlabels='sd3' ) varModel1 <- omxSetParameters( varModel1, label="sd4a", newlabels='sd4' ) varModel1 <- omxAssignFirstParameters(varModel1) varModel1Fit <- mxRun( varModel1, intervals=F ) varModel1Summ <- summary( varModel1Fit ) varModel1Summ mxCompare(ageModelFit, varModel1Fit) # H2v: Homogeneity of variances between MZ and DZ twins within like-sex twin pairs. # Constrain expected var to be equal across zygosity (for same sex) varModel2 <- varModel1Fit varModel2 <- omxSetParameters( varModel2, label="sd3", newlabels='sd1' ) varModel2 <- omxSetParameters( varModel2, label="sd4", newlabels='sd2' ) varModel2 <- omxAssignFirstParameters(varModel2) varModel2Fit <- mxRun( varModel2, intervals=F ) varModel2Summ <- summary( varModel2Fit ) varModel2Summ mxCompare(varModel1Fit, varModel2Fit) # H3v: Homogeneity of variances between MZ and DZ twins within all female twins # and within all male twins. Constrain expected var to be equal btw same sex # and opposite sex pairs varModel3 <- varModel2Fit varModel3 <- omxSetParameters( varModel3, label="sd5", newlabels='sd1' ) varModel3 <- omxSetParameters( varModel3, label="sd6", newlabels='sd2' ) varModel3 <- omxAssignFirstParameters(varModel3) varModel3Fit <- mxRun( varModel3, intervals=F ) varModel3Summ <- summary( varModel3Fit ) varModel3Summ mxCompare(varModel2Fit, varModel3Fit) # H4v: Homogeneity of variances between male and female twins. # Constrain expected var to be equal for all pairs varModel4 <- varModel3Fit varModel4 <- omxSetParameters( varModel4, label="sd2", newlabels='sd1' ) varModel4 <- omxAssignFirstParameters(varModel4) varModel4Fit <- mxRun( varModel4, intervals=F ) varModel4Summ <- summary( varModel4Fit ) varModel4Summ mxCompare(varModel3Fit, varModel4Fit) ############################################ # H1c: Presence of scalar sex limitation. # Constrain expected covar/correlation to be equal across MZ twins, and same sex DZ twins covModel1 <- varModel4Fit covModel1 <- omxSetParameters( covModel1, label="cor2", newlabels='cor1' ) covModel1 <- omxSetParameters( covModel1, label="cor4", newlabels='cor3' ) covModel1 <- omxAssignFirstParameters(covModel1) covModel1Fit <- mxRun( covModel1, intervals=F ) covModel1Summ <- summary( covModel1Fit ) covModel1Summ mxCompare(varModel4Fit, covModel1Fit) # H2c: Presence of non scalar sex limitation. #Constrain expected covar/correlation to be equal acrosss all DZ twins. covModel2 <- covModel1Fit covModel2 <- omxSetParameters( covModel2, label="cor5", newlabels='cor3' ) covModel2 <- omxAssignFirstParameters(covModel2) covModel2Fit <- mxRun( covModel2, intervals=F ) covModel2Summ <- summary( covModel2Fit ) covModel2Summ mxCompare(covModel1Fit, covModel2Fit) # H3c: Variance on a trait is influenced by genetic factors. #Constrain expected covar/correlation to be equal across all zygosities. covModel3 <- covModel2Fit covModel3 <- omxSetParameters( covModel3, label="cor3", newlabels='cor1' ) covModel3 <- omxAssignFirstParameters(covModel3) covModel3Fit <- mxRun( covModel3, intervals=T ) covModel3Summ <- summary( covModel3Fit ) covModel3Summ mxCompare(covModel2Fit, covModel3Fit) # H4c: Presence of familial aggregation. #Constrain all covariances to zero (famillial aggregation) covModel4 <- covModel2Fit covModel4 <- omxSetParameters( covModel4, label="cor1", free=F, value=0 ) covModel4 <- omxAssignFirstParameters(covModel4) covModel4Fit <- mxRun( covModel4, intervals=F ) covModel4Summ <- summary( covModel4Fit ) covModel4Summ mxCompare(covModel2Fit, covModel4Fit)