# Load the necessary packages
require(MASS)
require(OpenMx)
require(psych)

# load the functions to do the analyses 
source("lgcFun.R")

# Set your working directory
setwd("You_Must_Change_This_Text")

# Read in the data
lgcData_1 <- as.data.frame(read.table("lgcData_1.txt", header = T))
lgcData_2 <- as.data.frame(read.table("lgcData_2.txt", header = T))
lgcData_3 <- as.data.frame(read.table("lgcData_3.txt", header = T))

# Look at the data 
head(lgcData_1)
head(lgcData_2)
head(lgcData_3)

# Create two data objects (one for MZ and another for DZ twin families)
mzData_1 <- lgcData_1[lgcData_1$zyg == "MZ" ,]
dzData_1 <- lgcData_1[lgcData_1$zyg == "DZ" ,]

mzData_2 <- lgcData_2[lgcData_2$zyg == "MZ" ,]
dzData_2 <- lgcData_2[lgcData_2$zyg == "DZ" ,]

mzData_3 <- lgcData_3[lgcData_3$zyg == "MZ" ,]
dzData_3 <- lgcData_3[lgcData_3$zyg == "DZ" ,]


## Next we need to tell R what variables we plan on analyzing. 
# There are 6 waves and two twins. 
# Each wave is labels [trial1, trial2, trial3 ...]. 
# Twins are identified by a suffix _1 or _2 [e.g. "trial1_1" indicates for wave 1 for twin 1 or "trial4_2" indicataes wave 4 for twin 2]

selVars <- colnames(lgcData_1)[1:12]
selVars

dataMZ <- mzData_1
dataDZ <- dzData_1

# Run the Linear ACE LGM model
linACElgcModel <- linACElgc(mzData = dataMZ, dzData = dataDZ, selVars  = selVars, nWaves = 6)
linACElgcFit <- mxRun(linACElgcModel)
linACElgcSumm <- summary(linACElgcFit )
linACElgcSumm

# Run the Quadratic ACE LGM model
quadACElgcModel <- quadACElgc(mzData = dataMZ, dzData = dataDZ, selVars  = selVars, nWaves = 6)
quadACElgcFit <- mxRun(quadACElgcModel)
quadACElgcFit <- mxRun(quadACElgcModel)
quadACElgcSumm <- summary(quadACElgcFit )
quadACElgcSumm

# Run the Semiparametric ACE LGM model 
semiACElgcModel <- semiACElgc(mzData = dataMZ, dzData = dataDZ, selVars  = selVars, nWaves = 6)
semiACElgcFit <- mxRun(semiACElgcModel)
semiACElgcSumm <- summary(semiACElgcFit )
semiACElgcSumm

# Compare the models and determine which model fits best for each datset.
InfoCriteria <- rbind(
c(t(linACElgcSumm$informationCriteria))      ,
c(t(quadACElgcSumm$informationCriteria))     ,
c(t(semiACElgcSumm$informationCriteria))     )

colnames(InfoCriteria) <- c("df Penalty  AIC", "Parameters Penalty AIC", "Sample-Size Adjusted AIC", "df Penalty  BIC", "Parameters Penalty BIC", "Sample-Size Adjusted BIC")
rownames(InfoCriteria) <- c("Linear LGC", "Quadratic LGC", "Semiparametric LGC")
InfoCriteria

####

# While the fit statistics make a compelling case for selecting the best fitting model, it is instructive to plot the expected means for each model to understand WHY one model fits better than another.

# Make a figure to visualize the differences in the expected means of the growth parameters 

# reshape the data so that you can get a plot of several observed trajectories
nWaves <- 6
mzT1 <- dataMZ[,1:6]  ; colnames(mzT1) <- paste0("trial", 1:nWaves)
mzT2 <- dataMZ[,7:12] ; colnames(mzT2) <- paste0("trial", 1:nWaves)
dzT1 <- dataMZ[,1:6]  ; colnames(dzT1) <- paste0("trial", 1:nWaves)
dzT2 <- dataMZ[,7:12] ; colnames(dzT2) <- paste0("trial", 1:nWaves)

longData <- rbind(mzT1, dzT1, mzT2, dzT2)


# Make a figure to visualize the how the growth parameters affect the observed means for each wave of your data
linMeans  <- t(linACElgcFit$output$matrices$lgcLinACE.Mean) %*% t(linACElgcFit$output$matrices$lgcLinACE.lambda)
quadMeans <- t(quadACElgcFit$output$matrices$lgcQuadACE.Mean) %*% t(quadACElgcFit$output$matrices$lgcQuadACE.lambda)
semiMeans <- t(semiACElgcFit$output$matrices$semilgcACE.Mean) %*% t(semiACElgcFit$output$matrices$semilgcACE.lambda)

# Generate a blank plot (with the basic parameters that you want)
# Change the parameters of the graph to make it "pretty"
plot(1:6,linMeans, ylab = "DV", xlab = "Wave", ylim = c(0,75), type = "b", 
main = "Trajectories of the Different Growth Models for Dataset 1", lwd = 3, col = "white")

# randomly sample and plot 200 trajectories (you can do more if you want but this gives a good picture IMHO)
for(i in 1:200){
	lines(1:6,longData[sample(1:nrow(longData),1), paste0("trial", 1:nWaves)], ylab = " ", xlab = " ", col = i , lwd = .5)
}

# Plot the means (hint: to them one at a time)
lines(1:6,linMeans, type = "b",  lwd = 3, col = "black")
lines(1:6,quadMeans, type = "b",  lwd = 3, col = "red")
lines(1:6,semiMeans, type = "b",  lwd = 3, col = "blue")

legend("topleft", lwd = 3, 
       legend = c("Linear LGC", "Quadratic LGC", "Semiparametric LGC"), 
	   col = c("black", "red", "blue") )


################################################################################################
###        Pick one of the datasets and interpret the parameters of the best fitting.        ###
################################################################################################

################################################################################################
# Interpreting the Quadratic LGC Model
################################################################################################

selVars <- colnames(lgcData_1)[1:12]
dataMZ <- mzData_1
dataDZ <- dzData_1

# Run the Quadratic ACE LGM model
quadACElgcModel <- quadACElgc(mzData = dataMZ, dzData = dataDZ, selVars  = selVars, nWaves = 6)
quadACElgcFit <- mxRun(quadACElgcModel)
quadACElgcSumm <- summary(quadACElgcFit )
quadACElgcSumm

# The summary of the parameters estimates is very useful for providing an overview of the model and other information to assess the quality of the model. To really understand what is happening, we need to look at the estimates inside the model in more detail. I find it is easier to do that with reference to the specific matrices.

# Means of the latent growth factors 

# How can we get the means for the latent growth factors?
# How can we calculate the expected means for each waves?
# How would you interpret these parameters?


# What is the covariances between the latent intercept and the latent linear growth factor?
# What is the correlation between the latent intercept and the latent linear growth factor?


# How would we interpret the correlations between the latent intercept and the linear slope factors?

# What proportion of the variance in the latent growth factors is accounted for by A, C or E factors for each growth parameter?

# What proportion of the phenotypic variance accounted for by specific A, C or E factors/residuals


################################################################################################
# Interpreting the Linear LGC Model
################################################################################################

selVars <- colnames(lgcData_2)[1:12]
dataMZ <- mzData_2
dataDZ <- dzData_2

# Run the Linear ACE LGM model
linACElgcModel <- linACElgc(mzData = dataMZ, dzData = dataDZ, selVars  = selVars, nWaves = 6)
linACElgcFit <- mxRun(linACElgcModel)
linACElgcSumm <- summary(linACElgcFit )
linACElgcSumm

# The summary of the parameters estimates is very useful for providing an overview of the model and other information to assess the quality of the model. To really understand what is happening, we need to look at the estimates inside the model in more detail. I find it is easier to do that with reference to the specific matrices.

# Means of the latent growth factors 

# How can we get the means for the latent growth factors?
# How can we calculate the expected means for each waves?
# How would you interpret these parameters?


# What are the covariances between the latent growth factors?
# What are the correlations between the latent growth factors?

# How would we interpret the correlations between the latent intercept and the linear slope factors?

# What proportion of the variance in the latent growth factors is accounted for by A, C or E factors for each growth parameter?

# What proportion of the phenotypic variance accounted for by specific A, C or E factors/residuals


################################################################################################
# Interpreting the Semiparametric LGC Model
################################################################################################

selVars <- colnames(lgcData_3)[1:12]
dataMZ <- mzData_3
dataDZ <- dzData_3

# Run the Semiparametric ACE LGM model 
semiACElgcModel <- semiACElgc(mzData = dataMZ, dzData = dataDZ, selVars  = selVars, nWaves = 6)
semiACElgcFit <- mxRun(semiACElgcModel)
semiACElgcSumm <- summary(semiACElgcFit )
semiACElgcSumm

# The summary of the parameters estimates is very useful for providing an overview of the model and other information to assess the quality of the model. To really understand what is happening, we need to look at the estimates inside the model in more detail. I find it is easier to do that with reference to the specific matrices.

# Means of the latent growth factors 

# How can we get the means for the latent growth factors?
# How can we calculate the expected means for each waves?
# How would you interpret these parameters?


# What are the covariances between the latent growth factors?
# What are the correlations between the latent growth factors?

# How would we interpret the correlations between the latent intercept and the linear slope factors?

# What proportion of the variance in the latent growth factors is accounted for by A, C or E factors for each growth parameter?

# What proportion of the phenotypic variance accounted for by specific A, C or E factors/residuals

# How would you interpret the factor loadings of the semiparametric model?