###################################################################
##        Conducting a Linear Regression in R and OpenMx         ##
###################################################################

# Code to run packages needed for this session. 
# You need to do this every time you open a new R session.
library(psych)

# Set working directory using server address. 
# Please change to conduct analyses from your folder
#setwd('/home/yanyax19/2022')
#setwd('G:/yanyan/openmx/2022')

# Mac address example
#setwd('/Users/ecpromwormle/Documents/MyDocuments/Boulder/Boulder2022')

# PC address example
# setwd('C:\\Users\\ecpromwormle\\Desktop\\R Training Scripts')

# Open dataset files and have their contents reside in objects twins and Twins2
twins<- read.table(file='SimWtDataInd.csv',header=T,sep=",")
Twins2<- read.table(file='SimWtDataPair.csv',header=T,sep=",")

# Getting basic information on the object Twins2 
# which contains data from the dataset SimWtDataPair.csv
dim(Twins2)
head(Twins2)
names(Twins2)
summary(Twins2)
str(Twins2)


################################################
##        Introduction to OpenMx              ##
################################################
# First model - 1 Individual

# Visualizing the relationship between Site and Weight
boxplot(WT_T1 ~ Site_T1, col = "gray", data=Twins2)

# Running the regression using lm between site and weight
WeightFit_T1 <- lm(WT_T1 ~ Site_T1, data=Twins2)
summary(WeightFit_T1) # show results
coefficients(WeightFit_T1) # model coefficients


################################################################################
# Setting up the model 
# Y = beta0 + beta1*age 
################################################################################

library(OpenMx)
mxOption(NULL,"Default optimizer","NPSOL")

# Get data for Twin one member only
Twins2a_1 <- subset(x=Twins2, subset= !is.na(Site_T1) , 
                    select=c(WT_T1, Site_T1)) 

# Define dependent variable (Y) as Weight for Twin 1 using variable name WT_T1
# The name 'WT_T1' is contained in the object depVar
depVar <- 'WT_T1'

# Define Variance/Covariance matrix. Pass matrix to the object eVar
eVar     <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, 
                          values=10, labels='resid', name="residualVar" )

# Define matrix with regression betas. Pass matrices to the objects b0 & b1
b0			<- mxMatrix(type="Full", nrow=1, ncol=1, free=TRUE, values=50,
                labels="beta0T1", name="Intercept" )
b1			<- mxMatrix(type="Full", nrow=1, ncol=1, free=TRUE, values=1,
                labels="beta1T1", name="bSite" )

# Define matrix with independent variable. Pass matrix to the object x
x			<- mxMatrix(type="Full", nrow=1, ncol=1, free=FALSE,
               labels="data.Site_T1", name="Site" )

# Take previously defined matrices and build the model using
#mxAlgebra (y=b0+b1*Site)
#%x% is matrix multiplication
# Pass algebra to matrix expMean
expMean      <- mxAlgebra(Intercept + bSite%x%Site, name="regress")

# Tell OpenMx what object contains the data that will be used for the analyses
regData      <- mxData( observed=Twins2a_1, type="raw" )

# List object names referenced in the model
# Here, OBJECT NAMES are included - not matrix names
inclusions   <- list(eVar, b0,  b1, x, expMean)

# Finally, we call up the results of the algebras as the arguments 
# for the expectation function. 
# The dimnames map the data to the model.  
# NOTE- The MATRIX NAMES defined in mxMatrix() statements are used 
# here NOT the objects that are used to store the matrices.
exp         <- mxExpectationNormal( covariance="residualVar", 
                                    means="regress",
                                    dimnames=depVar )

# The fit function declares that the model is fit using maximum likelihood.
# When combined with raw data this means full information 
# maximum likelihood (FIML) is optimized.
funML       <- mxFitFunctionML()

# Build the Model 
# Specify the name of the model, the objects referenced, the data 
# and the objective
regModel     <- mxModel( "Regression101", inclusions, regData, exp, funML )
regFit       <- mxRun( regModel, intervals=FALSE)

# See summary results from regFit right now
( regSum    <- summary( regFit ) )

#for the results The Akaike information criterion (AIC) and the Bayesian information criterion (BIC)
#provide measures of model performance that account for model complexity. AIC and BIC combine a
#term reflecting how well the model fits the data with a term that penalizes the model in 
#proportion to its number of parameters. The model with the lowest AIC offers the best fit. 
# Lower BIC value indicates lower penalty terms hence a better model.


# looking at the object that contains the output in more depth
regFit

( regVerbose    <- summary( regFit, verbose=TRUE ) )


# ReRunning to look at optimization
regModel     <- mxOption(regModel,"Checkpoint Units", "iterations")
regModel     <- mxOption(regModel,"Checkpoint Count", 1)
regFit       <- mxRun( regModel, intervals=FALSE, checkpoint=T )

###########################################################
# SUBMODELS
###########################################################

# Go back and pickup the model so that we can run significance tests
# check for sig effects of Site
# Run a submodel that does not include the effect of site and compare 
# the difference in model fit between a model with the effect of site estimated versus one without
SiteEfModel    <- regFit
SiteEfModel    <- omxSetParameters( SiteEfModel, label="beta1T1", 
                                    free=FALSE, values=0 )
SiteEfFit      <- mxRun(SiteEfModel, intervals=FALSE)

# Use summary() function to report a summary of the results from the object SiteEFit.
# Pass the summary results to be stored in an object called SiteEfSumm
# Automatically see summary results by starting/ending entire line with parentheses.
(SiteEfSumm     <- summary(SiteEfFit))

# Test difference in fit to determine the significance of Site.
(deltaLL <-SiteEfSumm$Minus2LogLikelihood - regSum$Minus2LogLikelihood)

# difference in df
(deltaDF <-SiteEfSumm$degreesOfFreedom - regSum$degreesOfFreedom)

# significance test
(pchisq(deltaLL, lower.tail=F, deltaDF))

#R2
(R2<-SiteEfFit$residualVar@values-regFit$residualVar@values)

#########################################################################
# MORE EXPLORATION OF RESULTS
#########################################################################
# Want to explore the results from the model a bit more?
SiteEfFit
names(SiteEfFit)

# Take a look at the matrices that were used in the model
SiteEfFit$matrices

# Take a look at the formula used to calculate the model and the result
SiteEfFit$algebras

# Pull out the specific value of the result
SiteEfFit$algebras$regress$result

# See some of the data that went into the model
SiteEfFit$data

##################################################################
# Second OpenMx model - Add a cotwin
##################################################################

# using subset function create new dataset without missing data
Twins2a_2 <- subset(x=Twins2, subset= !is.na(Site_T1) & !is.na(Site_T2) , 
                    select=c(WT_T1, WT_T2, Site_T1, Site_T2)) 

# Identify the phenotypes of study (weight of twin 1 and twin 2)
depVar <- c('WT_T1','WT_T2')

# Variance/Covariance matrix
eVar     <- mxMatrix( type="Symm", nrow=2, ncol=2, free=TRUE, 
                          values=c(10,1,10), 
                          labels=c('varT1','covT12','varT2'), 
                          name="residualVar" )

# Regression betas
b0			<-mxMatrix(type="Full", nrow=1, ncol=2, free=T, values=50,
                labels=c("beta0T1","beta0T2"), name="Intercept" )
b1			<-mxMatrix(type="Full", nrow=1, ncol=1, free=T, values=0,
                labels="beta1", name="bSite" )

# Independent variable
x			<-mxMatrix(type="Full", nrow=1, ncol=2, free=F,
               labels=c("data.Site_T1", "data.Site_T2"), name="Site" )

# Building the model  (y=b0+b1*Site)
expMean      <- mxAlgebra(Intercept + bSite%x%Site, name="regress")

# Specify the data
regData      <- mxData( observed=Twins2a_2, type="raw" )

# List objects referenced in the model
# NOTE - object names have been included here, not matrix names
inclusions   <- list(eVar, b0,  b1, x, expMean)

# Finally, we call up the results of the algebras as the arguments 
# for the expectation function. 
# The dimnames map the data to the model.  
# NOTE- The matrix names are defined in mxMatrix() statements 
# are used here NOT the objects that are used to store the matrices.
exp         <- mxExpectationNormal( covariance="residualVar", 
                                    means="regress",
                                    dimnames=depVar )

#The fit function declares that the model is fit using maximum likelihood.
# When combined with raw data this means full information 
# maximum likelihood (FIML) is optimized.
funML       <- mxFitFunctionML()

#Build the model 
#specify the name of the model, the objects referenced, the data 
# and the objective
regModel2     <- mxModel( "Regression201", inclusions, regData, 
                          exp, funML )
regFit2       <- mxRun( regModel2, intervals=FALSE)

( regSum2    <- summary( regFit2 ) )


###########################################################################
#######################  OpenMx Resources #################################

# OpenMx website
# https://openmx.ssri.psu.edu/
# Wiki Page
# https://openmx.ssri.psu.edu/wiki/main-page
# Discussion Forum
# https://openmx.ssri.psu.edu/forums
# OpenMx YouTube Videos
# https://www.youtube.com/playlist?list=PL-eLmo9qXe_SfwGcClW41gUXoL2b7U1zT
#
##########################################################################
##########################################################################