# ----------------------------------------------------------------------- # Program: CreateSimData.R # Author: Matt Keller # Date: March 10, 2016 # Adapted by Lucia Colodro Conde, March 8 2018. # ----------------------------------------------------------------------- setwd("/home/lucia/Desktop/Friday_Hackathon") rm(list=ls(all=TRUE)) require(psych) require(OpenMx) source("miFunctions.R") mxOption(NULL,"Default optimizer","NPSOL") # ----------------------------------------------------------------------- # BEGIN HACKATHON! # ----------------------------------------------------------------------- # Starting things up # ----------------------------------------------------------------------- #Get our dataset from the web. #This is a cool R trick: your file might exist on your local machine #OR it might exist on the web! con <- "http://www.matthewckeller.com/public/all.dat2.txt" data <- read.table(con,header=TRUE) colnames(data) nms <- names(data) <- c("twintype","famid","tw1chol","tw2chol","tw1age","tw2age","tw1cholR","tw2cholR") # ----------------------------------------------------------------------- # Prepare Data # ----------------------------------------------------------------------- head(data) str(data) #get summary of each column in data summary(data) #make sure no weird outliers (e.g., -999) #check missingness sum(is.na(data)) # Check zygosity levels and frequencies. table(data$twintype) # Check if age of twin 2 is the same as age of twin 2. cor(data$tw1age,data$tw2age, use ="all") plot(data$tw1age,data$tw2age) # Creat two zygosity group for quick first check. mzData <- data[data$twintype %in% c('mzm','mzf'),3:6] dzData <- data[data$twintype %in% c('dzm','dzf','dzos'),3:6] dim(mzData) dim(dzData) #Always look at your data ! plot(mzData$tw1chol,mzData$tw2chol,main='MZ scatterplot') plot(dzData$tw1chol,dzData$tw2chol,main='DZ scatterplot') #Look at covariances round(cov(mzData[,1:2],use="complete"),3) #MZ Var/Covar 2x2 matrix round(cov(dzData[,1:2],use="complete"),3) #DZ Var/Covar 2x2 matrix #create object of MZ vs DZ cov's (cov.MZ <- cov(mzData,use="complete") [1,2]) (cov.DZ <- cov(dzData,use="complete") [1,2]) # ----------------------------------------------------------------------- # CHECK CTD ASSUMPTIONS AND EFFECT OF POTENTIAL COVARIATES (AGE) # Select Variables for Analysis nv <- 1 # number of variables ntv <- nv*2 # number of total variables selVars <- c('tw1chol' , 'tw2chol') defVars <- c('tw1age') useVars <- c(selVars,defVars) # ------------------------------------------------------------------ # PREPARE MODEL # Saturated Model # Set Starting Values StMean <- 0.5 # start value for means StSd <- 1 # start value for standard deviation StMZ <- .8 # start value for MZ correlation StDZ <- .4 # start value for DZ correlation # Age corrections # Regression effects beta1 <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=0.1, labels='betaAge', name="beta1" ) # Independent variables obsAge <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=FALSE, labels=c("data.tw1age","data.tw1age"), name="Age") inclusions <- list (beta1, obsAge) # FOR EACH ZYGOSITY GROUP # Select Data for Analysis & specify the model # Algebra for expected Mean Matrices # Building the model # Algebra for expected Variance/Covariance Matrices in MZ & DZ twins # Build Objectives for each group fitFunction <- mxFitFunctionML(rowDiagnostics=TRUE) # mzf mzfdata <- subset(data, twintype=="mzf", useVars) head(mzfdata) mzfData <- mxData( observed=mzfdata, type="raw" ) meanfMZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=StMean, labels=c("mean1","mean1a"), name="intercept_fMZ" ) expMeanfMZ <- mxAlgebra( intercept_fMZ + beta1 %x% Age, name="regressfMZ") sdFMZ <- mxMatrix( type = "Diag", nrow=ntv, ncol=ntv, free=TRUE, values=StSd, labels=c("sd1","sd1a"), name="sd_fMZ") corfMZ <- mxMatrix( type="Stand", nrow=ntv, ncol=ntv, free=TRUE, values=StMZ, labels="cor1",name="corfMZ" ) expCovfMZ <- mxAlgebra( sd_fMZ %&% corfMZ , name="expCovfMZ") objfMZ <- mxExpectationNormal( covariance="expCovfMZ", means="regressfMZ", dimnames=selVars ) cifMZ <- mxCI('expCovfMZ') modelfMZ <- mxModel( "fMZ", inclusions, expMeanfMZ, meanfMZ, sdFMZ, corfMZ, expCovfMZ, mzfData, objfMZ,cifMZ,fitFunction ) # mzm mzmdata <- subset(data, twintype=="mzm", useVars) mzmData <- mxData( observed=mzmdata, type="raw" ) meanmMZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=StMean, labels=c("mean2","mean2a"), name="intercept_mMZ" ) expMeanmMZ <- mxAlgebra(intercept_mMZ + beta1 %x% Age, name="regressmMZ") sdMMZ <- mxMatrix( type = "Diag", nrow=ntv, ncol=ntv, free=TRUE, values=StSd, labels=c("sd2","sd2a"), name="sd_mMZ") cormMZ <- mxMatrix( type="Stand", nrow=ntv, ncol=ntv, free=TRUE, values=StMZ, labels="cor2",name="cormMZ" ) expCovmMZ <- mxAlgebra( sd_mMZ %&% cormMZ , name="expCovmMZ") objmMZ <- mxExpectationNormal( covariance="expCovmMZ", means="regressmMZ", dimnames=selVars ) cimMZ <- mxCI('expCovmMZ') modelmMZ <- mxModel( "mMZ", inclusions, expMeanmMZ, meanmMZ, sdMMZ, cormMZ, expCovmMZ, mzmData, objmMZ,cimMZ,fitFunction ) # dzf dzfdata <- subset(data, twintype=="dzf", useVars) dzfData <- mxData( observed=dzfdata, type="raw" ) meanfDZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=StMean, labels=c("mean3","mean3a"), name="intercept_fDZ" ) expMeanfDZ <- mxAlgebra(intercept_fDZ + beta1 %x% Age, name="regressfDZ") sdFDZ <- mxMatrix( type = "Diag", nrow=ntv, ncol=ntv, free=TRUE, values=StSd, labels=c("sd3","sd3a"), name="sd_fDZ") corfDZ <- mxMatrix( type="Stand", nrow=ntv, ncol=ntv, free=TRUE, values=StDZ, labels="cor3",name="corfDZ" ) expCovfDZ <- mxAlgebra( sd_fDZ %&% corfDZ , name="expCovfDZ") objfDZ <- mxExpectationNormal( covariance="expCovfDZ", means="regressfDZ", dimnames=selVars ) cifDZ <- mxCI('expCovfDZ' ) modelfDZ <- mxModel( "fDZ", inclusions, expMeanfDZ, meanfDZ, sdFDZ, corfDZ, expCovfDZ, dzfData, objfDZ,cifDZ,fitFunction ) # dzm dzmdata <- subset(data, twintype=="dzm", useVars) dzmData <- mxData( observed=dzmdata, type="raw" ) meanmDZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=StMean, labels=c("mean4","mean4a"), name="intercept_mDZ" ) expMeanmDZ <- mxAlgebra(intercept_mDZ + beta1 %x% Age, name="regressmDZ") sdMDZ <- mxMatrix( type = "Diag", nrow=ntv, ncol=ntv, free=TRUE, values=StSd, labels=c("sd4","sd4a"), name="sd_mDZ") cormDZ <- mxMatrix( type="Stand", nrow=ntv, ncol=ntv, free=TRUE, values=StDZ, labels="cor4",name="cormDZ" ) expCovmDZ <- mxAlgebra( sd_mDZ %&% cormDZ , name="expCovmDZ") objmDZ <- mxExpectationNormal( covariance="expCovmDZ", means="regressmDZ", dimnames=selVars ) cimDZ <- mxCI( 'expCovmDZ' ) modelmDZ <- mxModel( "mDZ", inclusions, expMeanmDZ, meanmDZ, sdMDZ, cormDZ, expCovmDZ, dzmData, objmDZ ,cimDZ, fitFunction) # dzmf dzmfdata <- subset(data, twintype=="dzos", useVars) dzmfData <- mxData( observed=dzmfdata, type="raw" ) meanmfDZ <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=StMean, labels=c("mean6","mean5"), name="intercept_mfDZ" ) expMeanmfDZ <- mxAlgebra(intercept_mfDZ + beta1 %x% Age, name="regressmfDZ") sdMFDZ <- mxMatrix( type = "Diag", nrow=ntv, ncol=ntv, free=TRUE, values=StSd, labels=c("sd6","sd5"), name="sd_mfDZ") cormfDZ <- mxMatrix( type="Stand", nrow=ntv, ncol=ntv, free=TRUE, values=StDZ, labels="cor5",name="cormfDZ" ) expCovmfDZ <- mxAlgebra( sd_mfDZ %&% cormfDZ , name="expCovmfDZ") objmfDZ <- mxExpectationNormal( covariance="expCovmfDZ", means="regressmfDZ", dimnames=selVars ) cimfDZ <- mxCI('expCovmfDZ') modelmfDZ <- mxModel( "mfDZ", inclusions, expMeanmfDZ, meanmfDZ, sdMFDZ,cormfDZ, expCovmfDZ, dzmfData, objmfDZ,cimfDZ,fitFunction ) # Sum the likelihoods modelFit <- mxFitFunctionMultigroup( c("fMZ", "mMZ", "fDZ", "mDZ", "mfDZ") ) #Putting it all together baseModel <- mxModel( "base", modelfMZ, modelmMZ, modelfDZ, modelmDZ, modelmfDZ, modelFit) # ------------------------------------------------------------------------------ # RUN MODEL baseFit <- mxRun( baseModel, intervals=F ) baseSumm <- summary( baseFit ) baseSumm ################################################################################################# # SUBMODELS ################################################################################################# # H1m: Presence of birth order effects. Constrain expected Means to be equal across twin order meanmodel1 <- baseFit meanmodel1 <- omxSetParameters( meanmodel1, label="mean1a", newlabels='mean1' ) meanmodel1 <- omxSetParameters( meanmodel1, label="mean2a", newlabels='mean2' ) meanmodel1 <- omxSetParameters( meanmodel1, label="mean3a", newlabels='mean3' ) meanmodel1 <- omxSetParameters( meanmodel1, label="mean4a", newlabels='mean4' ) meanmodel1 <- omxAssignFirstParameters(meanmodel1) meanmodel1Fit <- mxRun( meanmodel1, intervals=F ) meanmodel1Summ <- summary( meanmodel1Fit ) meanmodel1Summ mxCompare(baseFit, meanmodel1Fit) # H2m: Homogeneity of means between MZ and DZ twins within like-sex twin pairs. Constrain expected Means to be equal in same-sex pairs meanmodel2 <- meanmodel1Fit meanmodel2 <- omxSetParameters( meanmodel2, label="mean1", newlabels='mean3' ) meanmodel2 <- omxSetParameters( meanmodel2, label="mean2", newlabels='mean4' ) meanmodel2 <- omxAssignFirstParameters(meanmodel2) meanmodel2Fit <- mxRun( meanmodel2, intervals=F ) meanmodel2Summ <- summary( meanmodel2Fit ) meanmodel2Summ mxCompare(meanmodel1Fit, meanmodel2Fit) # H3m: Homogeneity of means between MZ and DZ twins within all female twins and within all male twins. Constrain expected Means to be equal between same sex and opposite sex pairs # H4m: Homogeneity of means between male and female means. Constrain expected Means to be equal across sex and zygosity pair # Significance of covariate/s ############################################ ############################################ # H1v: Presence of birth order effects . Constrain expected var to be equal across twin order # H2v: Homogeneity of variances between MZ and DZ twins within like-sex twin pairs. Constrain expected var to be equal across zygosity (for same sex) # H3v: Homogeneity of variances between MZ and DZ twins within all female twins and within all male twins. Constrain expected var to be equal btw same sex and opposite sex pairs # H4v: Homogeneity of variances between male and female twins. Constrain expected var to be equal for all pairs ############################################ # H1c: Presence of scalar sex limitation. Constrain expected covar/correlation to be equal across MZ twins, and same sex DZ twins # H2c: Presence of non scalar sex limitation. Constrain expected covar/correlation to be equal acrosss all DZ twins. # H3c: Variance on a trait is influenced by genetic factors. Constrain expected covar/correlation to be equal across all zygosities. # H4c: Presence of familial aggregation. Constrain all covariances to zero (famillial aggregation)