################################################################### ## Conducting a Linear Regression in R and OpenMx ## ################################################################### # Code to run packages needed for this session. # You need to do this every time you open a new R session. library(psych) # Set working directory using server address. # Please change to conduct analyses from your folder #setwd('/home/yanyax19/2022') #setwd('G:/yanyan/openmx/2022') # Mac address example #setwd('/Users/ecpromwormle/Documents/MyDocuments/Boulder/Boulder2022') # PC address example # setwd('C:\\Users\\ecpromwormle\\Desktop\\R Training Scripts') # Open dataset files and have their contents reside in objects twins and Twins2 twins<- read.table(file='SimWtDataInd.csv',header=T,sep=",") Twins2<- read.table(file='SimWtDataPair.csv',header=T,sep=",") # Getting basic information on the object Twins2 # which contains data from the dataset SimWtDataPair.csv dim(Twins2) head(Twins2) names(Twins2) summary(Twins2) str(Twins2) ################################################ ## Introduction to OpenMx ## ################################################ # First model - 1 Individual # Visualizing the relationship between Site and Weight boxplot(WT_T1 ~ Site_T1, col = "gray", data=Twins2) # Running the regression using lm between site and weight WeightFit_T1 <- lm(WT_T1 ~ Site_T1, data=Twins2) summary(WeightFit_T1) # show results coefficients(WeightFit_T1) # model coefficients ################################################################################ # Setting up the model # Y = beta0 + beta1*age ################################################################################ library(OpenMx) mxOption(NULL,"Default optimizer","NPSOL") # Get data for Twin one member only Twins2a_1 <- subset(x=Twins2, subset= !is.na(Site_T1) , select=c(WT_T1, Site_T1)) # Define dependent variable (Y) as Weight for Twin 1 using variable name WT_T1 # The name 'WT_T1' is contained in the object depVar depVar <- 'WT_T1' # Define Variance/Covariance matrix. Pass matrix to the object eVar eVar <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, values=10, labels='resid', name="residualVar" ) # Define matrix with regression betas. Pass matrices to the objects b0 & b1 b0 <- mxMatrix(type="Full", nrow=1, ncol=1, free=TRUE, values=50, labels="beta0T1", name="Intercept" ) b1 <- mxMatrix(type="Full", nrow=1, ncol=1, free=TRUE, values=1, labels="beta1T1", name="bSite" ) # Define matrix with independent variable. Pass matrix to the object x x <- mxMatrix(type="Full", nrow=1, ncol=1, free=FALSE, labels="data.Site_T1", name="Site" ) # Take previously defined matrices and build the model using #mxAlgebra (y=b0+b1*Site) #%x% is matrix multiplication # Pass algebra to matrix expMean expMean <- mxAlgebra(Intercept + bSite%x%Site, name="regress") # Tell OpenMx what object contains the data that will be used for the analyses regData <- mxData( observed=Twins2a_1, type="raw" ) # List object names referenced in the model # Here, OBJECT NAMES are included - not matrix names inclusions <- list(eVar, b0, b1, x, expMean) # Finally, we call up the results of the algebras as the arguments # for the expectation function. # The dimnames map the data to the model. # NOTE- The MATRIX NAMES defined in mxMatrix() statements are used # here NOT the objects that are used to store the matrices. exp <- mxExpectationNormal( covariance="residualVar", means="regress", dimnames=depVar ) # The fit function declares that the model is fit using maximum likelihood. # When combined with raw data this means full information # maximum likelihood (FIML) is optimized. funML <- mxFitFunctionML() # Build the Model # Specify the name of the model, the objects referenced, the data # and the objective regModel <- mxModel( "Regression101", inclusions, regData, exp, funML ) regFit <- mxRun( regModel, intervals=FALSE) # See summary results from regFit right now ( regSum <- summary( regFit ) ) #for the results The Akaike information criterion (AIC) and the Bayesian information criterion (BIC) #provide measures of model performance that account for model complexity. AIC and BIC combine a #term reflecting how well the model fits the data with a term that penalizes the model in #proportion to its number of parameters. The model with the lowest AIC offers the best fit. # Lower BIC value indicates lower penalty terms hence a better model. # looking at the object that contains the output in more depth regFit ( regVerbose <- summary( regFit, verbose=TRUE ) ) # ReRunning to look at optimization regModel <- mxOption(regModel,"Checkpoint Units", "iterations") regModel <- mxOption(regModel,"Checkpoint Count", 1) regFit <- mxRun( regModel, intervals=FALSE, checkpoint=T ) ########################################################### # SUBMODELS ########################################################### # Go back and pickup the model so that we can run significance tests # check for sig effects of Site # Run a submodel that does not include the effect of site and compare # the difference in model fit between a model with the effect of site estimated versus one without SiteEfModel <- regFit SiteEfModel <- omxSetParameters( SiteEfModel, label="beta1T1", free=FALSE, values=0 ) SiteEfFit <- mxRun(SiteEfModel, intervals=FALSE) # Use summary() function to report a summary of the results from the object SiteEFit. # Pass the summary results to be stored in an object called SiteEfSumm # Automatically see summary results by starting/ending entire line with parentheses. (SiteEfSumm <- summary(SiteEfFit)) # Test difference in fit to determine the significance of Site. (deltaLL <-SiteEfSumm$Minus2LogLikelihood - regSum$Minus2LogLikelihood) # difference in df (deltaDF <-SiteEfSumm$degreesOfFreedom - regSum$degreesOfFreedom) # significance test (pchisq(deltaLL, lower.tail=F, deltaDF)) #R2 (R2<-SiteEfFit$residualVar@values-regFit$residualVar@values) ######################################################################### # MORE EXPLORATION OF RESULTS ######################################################################### # Want to explore the results from the model a bit more? SiteEfFit names(SiteEfFit) # Take a look at the matrices that were used in the model SiteEfFit$matrices # Take a look at the formula used to calculate the model and the result SiteEfFit$algebras # Pull out the specific value of the result SiteEfFit$algebras$regress$result # See some of the data that went into the model SiteEfFit$data ################################################################## # Second OpenMx model - Add a cotwin ################################################################## # using subset function create new dataset without missing data Twins2a_2 <- subset(x=Twins2, subset= !is.na(Site_T1) & !is.na(Site_T2) , select=c(WT_T1, WT_T2, Site_T1, Site_T2)) # Identify the phenotypes of study (weight of twin 1 and twin 2) depVar <- c('WT_T1','WT_T2') # Variance/Covariance matrix eVar <- mxMatrix( type="Symm", nrow=2, ncol=2, free=TRUE, values=c(10,1,10), labels=c('varT1','covT12','varT2'), name="residualVar" ) # Regression betas b0 <-mxMatrix(type="Full", nrow=1, ncol=2, free=T, values=50, labels=c("beta0T1","beta0T2"), name="Intercept" ) b1 <-mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=0, labels="beta1", name="bSite" ) # Independent variable x <-mxMatrix(type="Full", nrow=1, ncol=2, free=F, labels=c("data.Site_T1", "data.Site_T2"), name="Site" ) # Building the model (y=b0+b1*Site) expMean <- mxAlgebra(Intercept + bSite%x%Site, name="regress") # Specify the data regData <- mxData( observed=Twins2a_2, type="raw" ) # List objects referenced in the model # NOTE - object names have been included here, not matrix names inclusions <- list(eVar, b0, b1, x, expMean) # Finally, we call up the results of the algebras as the arguments # for the expectation function. # The dimnames map the data to the model. # NOTE- The matrix names are defined in mxMatrix() statements # are used here NOT the objects that are used to store the matrices. exp <- mxExpectationNormal( covariance="residualVar", means="regress", dimnames=depVar ) #The fit function declares that the model is fit using maximum likelihood. # When combined with raw data this means full information # maximum likelihood (FIML) is optimized. funML <- mxFitFunctionML() #Build the model #specify the name of the model, the objects referenced, the data # and the objective regModel2 <- mxModel( "Regression201", inclusions, regData, exp, funML ) regFit2 <- mxRun( regModel2, intervals=FALSE) ( regSum2 <- summary( regFit2 ) ) ########################################################################### ####################### OpenMx Resources ################################# # OpenMx website # https://openmx.ssri.psu.edu/ # Wiki Page # https://openmx.ssri.psu.edu/wiki/main-page # Discussion Forum # https://openmx.ssri.psu.edu/forums # OpenMx YouTube Videos # https://www.youtube.com/playlist?list=PL-eLmo9qXe_SfwGcClW41gUXoL2b7U1zT # ########################################################################## ##########################################################################