# TABLE OF CONTENTS
# Data simulation
# Dual change score - Genetically informative


# Data simulation

require(OpenMx)

set.seed(12345)
##$#$#$#$#$#$#$#$
 # Latent Growth Simplex Model (i.e. Dual Change Score) on 5 Repeatedly Measured Latent Traits
 nVariables <- 5
 nFactors   <- 5
 nSubjects  <- 1500
 nGrow      <- 3
 
 # The covariance of y is lambda.y %*% solve(I-B)%*% (gamma%*%phi%*%gamma + psi) %*% t(solve(I-B))%*%t(lambda.y) + theta.epsilon
 
# Specify the lambda.y matrix - Factor loadings on the latent factors
lambda.y  	<- diag(5)
 
# Specify the I-B^-1  - The causal pathways between the latent factors

I5 <- diag(5)
B <- matrix(c(0,.8,0,0,0,                             # You can change these values
 	          0,0,.8,0,0,
 	          0,0,0,.8,0,
 	          0,0,0,0,.8,
 		      0,0,0,0,0),5)
 
# Specify gamma - Factor loadings on the exogenous variables (in this case growth parameters)
gamma  		<- matrix(c(1,1,1,1,1,
 	              -2,-1,0,1,2,
 	               2,-1,-2,-1,2),5,3)

# Specify phi - covariance of the exogenous variables
phiChol 	<- matrix(c(.9,.4,.1, .4,.7,.2, .1,.2,.4),3 )     # You can change these values
phi     	<- phiChol%*% t(phiChol)
cov2cor(phi)
#.9,.4,.1,
#.4,.7,.2,
#.1,.2,.4

#Specify psi - residuals of the latent factors 
psi <- .4 * diag(5)                                    # You can change these values
psi[1,1]	<- 0

# Specify theta.epsilon - the residuals of the manifest variables
theta.epsilon <- .5 * diag(nVariables)                # You can change these values

impCov <- lambda.y %*% solve(I5-B)%*% (gamma%*%phi%*%t(gamma) + psi) %*% t(solve(I5-B))%*%t(lambda.y) + theta.epsilon

h2 <- .5
c2 <- .25

MZ <- matrix(c(1, (h2 + c2), (h2+c2),1 ), 2)
DZ <- matrix(c(1, (.5*h2 + c2), (.5*h2+c2),1 ), 2)

impCovMZ <- MZ %x% impCov 
impCovDZ <- DZ %x% impCov 


 LatGrowMean <- matrix(c(2, .8, -.2),3,1)               # You can change these values
 LatFacMean <- solve(I5-B)%*%gamma %*% LatGrowMean
 OBSMeans <- lambda.y %*% LatFacMean 
 
 
 set.seed(2016)
 MZdata <- mvrnorm(nSubjects, mu = c(OBSMeans, OBSMeans), Sigma = impCovMZ, empirical = F)
 colnames(MZdata) <- c(paste("t1", 1:5, sep = "_"), paste("t2", 1:5, sep = "_"))

 set.seed(2016)
 DZdata <- mvrnorm(nSubjects, mu = c(OBSMeans, OBSMeans), Sigma = impCovDZ, empirical = F)
 colnames(DZdata) <- c(paste("t1", 1:5, sep = "_"), paste("t2", 1:5, sep = "_"))
 
# write.table(DZdata, "dz_longitudinal_dcs_data.txt")
# write.table(MZdata, "mz_longitudinal_dcs_data.txt")
 
 
# Dual change score - Genetically informative

mxOption(NULL, 'Default optimizer', 'NPSOL')
#mxOption(NULL, 'Default optimizer', 'SLSQP')

#Read data
#setwd("/Users/ngillespie/Documents/work/papers/dual_change_score/")
#data  <-  read.table("data.txt", header=TRUE, na.strings=".")
#head(data)

nvars 		<- 5		# Number of manifest/observed variables
ntvars 		<- nvars*2	#
nFactors	<- 5		# Number of latent factor true scores
nGrow   	<- 3		# Number of growth parameters: intercept; linear; & quadratic rates of change 

# Select Data
# See DualChangeSim.R for simulated longitudinal twin pair data
selVars     <- c("t1_1", "t1_2", "t1_3", "t1_4", "t1_5", "t2_1", "t2_2", "t2_3", "t2_4", "t2_5")
# mzData   <- subset(data, zyg==1, selVars)
# dzData   <- subset(data, zyg==3, selVars)
head(MZdata)			# Based on DualChangeSim.R
head(DZdata)			# Based on DualChangeSim.R

# Print Descriptive Statistics
round(colMeans(MZdata,na.rm=TRUE),2)
round(colMeans(DZdata,na.rm=TRUE),4)
round(cor(MZdata,use="complete"),4)
round(cor(DZdata,use="complete"),2)[1:5,1:5]
round(cor(DZdata,use="complete"),4)[6:10,6:10]

# Matrices in the full DCS model

# ϕ phi 	= Cholesky Decomposition pathway coefficients
# Γ gamma   = factor loadings from growth parameters to true scores / latent factors
# Ψ psi 	= innovations, residuals for the latent factor true scores (variance not account by growth factors)
# Β beta 	= auto-regression pathway
# I identity
# Λ lamba   = factor loadings from true scores / latent factors to observed variable
# ϵ epsilon = meaurement error

# phi
# covariance between the exogenous variables i.e. between Intercept, Slope, & Quadractic latent factors
chol_a		<- c("a11","a21","a31","a22","a32","a33")
chol_c		<- c("c11","c21","c31","c22","c32","c33")
chol_e		<- c("e11","e21","e31","e22","e32","e33")
phi_chol_a	<- mxMatrix(type = "Lower", nrow = nGrow, ncol = nGrow, 
				free = T, values = c(.6,.4,.4,.3,.3,.1), 
				labels = chol_a, name = "phi_chol_a")			
phi_chol_c	<- mxMatrix(type = "Lower", nrow = nGrow, ncol = nGrow, free = T, values = c(.1,.1,.1,.1,.1,.1), labels = chol_c, name = "phi_chol_c")
phi_chol_e	<- mxMatrix(type = "Lower", nrow = nGrow, ncol = nGrow, free = T, values = c(.6,.4,.4,.3,.3,.1), labels = chol_e, name = "phi_chol_e")
phiA     	<- mxAlgebra(phi_chol_a%*% t(phi_chol_a), name = "phiA")	# Variance/covariance matrix for 'A' I,S & Q
phiC     	<- mxAlgebra(phi_chol_c%*% t(phi_chol_c), name = "phiC")	# Variance/covariance matrix for 'C' I,S & Q
phiE     	<- mxAlgebra(phi_chol_e%*% t(phi_chol_e), name = "phiE")	# Variance/covariance matrix for 'E' I,S & Q
corrA 		<- mxAlgebra( expression=cov2cor(phiA), name="corrA" )		# A latent factor correlations
corrC 		<- mxAlgebra( expression=cov2cor(phiC), name="corrC" )		# C latent factor correlations
corrE 		<- mxAlgebra( expression=cov2cor(phiE), name="corrE" )		# E latent factor correlations


# gamma
# factor loadings on the exogenous variables (intercept & growth parameters)
# factor loadings from Intercept, Slope, & Quadractic to latent true scores
# 5 x 3 matrix
Glabs   	<- c(paste("i",1:nVariables,sep=""), paste("s",1:nVariables,sep=""), paste("q",1:nVariables,sep=""))
gamma    	<- mxMatrix( type="Full", nrow=nFactors, ncol=nGrow, 
				free=F, values=c(1,1,1,1,1, -2,-1,0,1,2, 2,-1,-2,-1,2), 
				labels=Glabs, name="gamma" ) 

# psi
# residuals of the latent factor true scores (variance not account by the growth factors)
# innovations in the auto-regression model
	# NB: direction of causation goes DOWN the column & OUT along the row
psi_lab_a	<- c("ia11","ia22","ia33","ia44","ia55")
psi_lab_c	<- c("ic11","ic22","ic33","ic44","ic55")
psi_lab_e	<- c("ie11","ie22","ie33","ie44","ie55")
psi_a 		<- mxMatrix(type = "Diag", nrow = nFactors, ncol = nFactors, 
						free = c(F,T,T,T,T), labels = psi_lab_a, 
						values = c(0,1,1,1,1), name = "psi_a")	
psi_c 		<- mxMatrix(type = "Diag", nrow = nFactors, ncol = nFactors, free = c(F,T,T,T,T), labels = psi_lab_c, values = c(0,1,1,1,1), name = "psi_c")
psi_e 		<- mxMatrix(type = "Diag", nrow = nFactors, ncol = nFactors, free = c(F,T,T,T,T), labels = psi_lab_e, values = c(0,1,1,1,1), name = "psi_e")
			
# beta
# causal regression coefficients
betaF   	<- matrix(c(F,T,F,F,F,  F,F,T,F,F,  F,F,F,T,F,  F,F,F,F,T,  F,F,F,F,F),5) 		# Free elements
betaS   	<- matrix(c(0,.5,0,0,0,   0,0,.5,0,0,   0,0,0,.5,0,   0,0,0,0,.5,   0,0,0,0,0),5)	# Start values
betalab 	<- matrix(c(NA,"b",NA,NA,NA,   NA,NA,"b",NA,NA,   NA,NA,NA,"b",NA,   NA,NA,NA,NA,"b",   NA,NA,NA,NA,NA),5)		   
beta    	<- mxMatrix(type="Full", nrow = nFactors, ncol = nFactors, 
				free = betaF, values = betaS, 
				ubound = 1, lbound = -1, 
				labels = betalab, name = "beta")	
ci			<- mxCI(c("b"))
I       	<- mxMatrix(type="Iden", nrow = nFactors, ncol = nFactors, name = "I")

# lamba
# factor loadings
loadS 		<- diag(nFactors)
loadF 		<- F
lamba   	<- mxMatrix(type="Full", nrow = nVariables, ncol = nFactors, 
				free = loadF, values = loadS, name = "lamba")

# Epsilon
# measurement error/residuals for each observed/manifest items/variable
epsilon_a 	<- mxMatrix(type = "Diag", nrow = nVariables, ncol = nVariables, 
				free = T, labels = "res_a", 
				values = 1, name = "epsilon_a")				
epsilon_c 	<- mxMatrix(type = "Diag", nrow = nVariables, ncol = nVariables, free = T, labels = "res_c" ,values = 1, name = "epsilon_c")
epsilon_e 	<- mxMatrix(type = "Diag", nrow = nVariables, ncol = nVariables, free = T, labels = "res_e" ,values = 1, name = "epsilon_e")

# Expected covariance
# Λ * (I-Β)~ * (Γ * ϕ ' Γ' + Ψ) * (I-Β)~' * Λ' + ϵ
# expCov <- mxAlgebra(Λ %*% solve(I-Β)%*% (Γ %*% ϕ %*% t(Γ) + Ψ) %*% t(solve(I-Β))%*%t(Λ) + ϵ, name = "expCov")
A 			<- mxAlgebra(lamba %*% solve(I-beta)%*% (gamma %*% phiA %*% t(gamma) + psi_a) %*% t(solve(I-beta))%*%t(lamba) + epsilon_a, name = "A")
C 			<- mxAlgebra(lamba %*% solve(I-beta)%*% (gamma %*% phiC %*% t(gamma) + psi_c) %*% t(solve(I-beta))%*%t(lamba) + epsilon_c, name = "C")
E 			<- mxAlgebra(lamba %*% solve(I-beta)%*% (gamma %*% phiE %*% t(gamma) + psi_e) %*% t(solve(I-beta))%*%t(lamba) + epsilon_e, name = "E")
V      		<- mxAlgebra( expression= A+C+E, name="V" )
covMZ    	<- mxAlgebra( expression= rbind( cbind(A+C+E, A+C), 
											 cbind(A+C  , A+C+E)), name="expCovMZ" )										 
covDZ    	<- mxAlgebra( expression= rbind( cbind(A+C+E,     0.5%x%A+C), 
											 cbind(0.5%x%A+C, A+C+E)), name="expCovDZ" )

# Growth parameter means
# (Λ * ( (I-Β)~ * Γ * μ ) )'
GroMean 	<- mxMatrix(type="Full", nrow = nGrow, ncol = 1, free = T, 
						values = c(2,.5,-.2), labels=c("ui","us","uq"), name = "GroMean")
FacMean  	<- mxAlgebra(solve(I-beta) %*% gamma %*% GroMean, name = "FacMean", dimnames = list(paste("F",1:5, sep=""),"FacMeans"))
ManMean  	<- mxAlgebra(t(lamba %*% FacMean), "ManMean")
expMean  	<- mxAlgebra( expression= cbind(ManMean, ManMean), name="expMean" ) # Expected means for twin pair

# Data Objects for Multiple Groups
dataMZ   	<- mxData( observed=MZdata, type="raw" )
dataDZ   	<- mxData( observed=DZdata, type="raw" )

# Objective Objects for Multiple Groups
# Define how the model expectations are calculated 
# The mxExpectationNormal function uses the algebra defined by the 'covariance' and 'means' arguments to define 
# the expected covariance and means under the assumption of multivariate normality
objMZ    	<- mxExpectationNormal( covariance="expCovMZ", means="expMean", dimnames=selVars )
objDZ    	<- mxExpectationNormal( covariance="expCovDZ", means="expMean", dimnames=selVars )

# Function to compute -2*(log likelihood) of the data given the current values of the free parameters and the expectation function
fiML    	<- mxFitFunctionML()

# Standardized variance Components
rowVC     <- rep('VC',nvars)
colVC     <- rep(c('A','C','E','SA','SC','SE'),each=nvars)
estVC     <- mxAlgebra( expression=cbind(A,C,E,A/V,C/V,E/V), name="VC", dimnames=list(rowVC,colVC))

# Final steps
pars    	<- list( gamma, lamba, I, beta, ci, psi_a, psi_c, psi_e, phi_chol_a, phi_chol_c, phi_chol_e, phiA, phiC, phiE, 
					 corrA, corrC, corrE, epsilon_a, epsilon_c, epsilon_e, A, C, E, V, GroMean,FacMean,ManMean,expMean,
					 estVC)
modelMZ 	<- mxModel( pars, dataMZ, covMZ, objMZ, fiML, name="MZ" ) # Function to create MxModel objects 
modelDZ 	<- mxModel( pars, dataDZ, covDZ, objDZ, fiML, name="DZ" ) # Function to create MxModel objects 
obj  		  <- mxFitFunctionMultigroup(c("MZ","DZ"))				  # Function used to fit a multiple group model
dcs_model	<- mxModel( "dcs_model", pars, modelMZ, modelDZ, obj )
summary(dcs_fit <- mxTryHard(dcs_model,  extraTries=15, intervals = T, greenOK=FALSE, checkHess=TRUE, fit2beat=Inf, set.seed(2016)))

# Number of p	arameters
 # ϕ phi 		  = 18
 # Γ gamma 		= fixed
 # Ψ psi 		  = 12
 # Β beta 		= 1
 # I identity	= fixed
 # Λ lamba 		= fixed 
 # ϵ epsilon	= 3
 # μ means 		= 3
 # Total		  = 37

# Full (A+C+E) dual-change score model
# NB: mxTryHard() uses (pseudo-)random-number generation ∴ set seed


	round(dcs_fit$output$estimate,4)
	round(dcs_fit$algebras$VC$result[,16:30],2) # Standardized variance componnets at each time [A/(A+C+E)] etc 
	dcs_fit$algebras$corrA						# Correlations between genetic growth parameters
	dcs_fit$algebras$corrC						# Correlations between 'C' growth parameters
	dcs_fit$algebras$corrE						# Correlations between 'E' growth parameters
	summary(dcs_fit)
	summary(dcs_fit,verbose=T)					# Model parameters contingent upon UPPER and LOWER 'b' parameters

# AE Dual Change Score. Remove C phi (growth variance/covariance), remove C psi (auto-regression variance/covariance)
 AE_DCS     <- mxModel( dcs_fit, name="AE_DCS" )
 AE_DCS     <- omxSetParameters( AE_DCS, label=c("c11","c21","c31","c22","c32","c33"), free=F, values=0)	# Remove 'C' growth parameters	
 AE_DCS     <- omxSetParameters( AE_DCS, label=c("ic11","ic22","ic33","ic44","ic55"), free=F, values=0)		# Remove 'C' residuals (auto-regression)
 AE_DCS     <- omxSetParameters( AE_DCS, label=c("res_c"), free=F, values=0)								# Remove 'C' errors
 summary(AE_DCS_fit <- mxTryHard(AE_DCS,  extraTries=15,  greenOK=FALSE, checkHess=TRUE, fit2beat=Inf, set.seed(2016)))

# AE Dual Change Score
# A phi (growth variance/covariance) + E psi (auto-regression variance/covariance)
 Aϕ_EΨ  <- mxModel( dcs_fit, name="Aϕ_EΨ" )
 Aϕ_EΨ  <- omxSetParameters( Aϕ_EΨ, label=c("c11","c21","c31","c22","c32","c33"), free=F, values=0) # Remove C latent growth 
 Aϕ_EΨ  <- omxSetParameters( Aϕ_EΨ, label=c("ic11","ic22","ic33","ic44","ic55"), free=F, values=0)  # Remove C innovations
 Aϕ_EΨ  <- omxSetParameters( Aϕ_EΨ, label=c("res_c"), free=F, values=0) 							# Remove C residula error
 Aϕ_EΨ  <- omxSetParameters( Aϕ_EΨ, label=c("ia11","ia22","ia33","ia44","ia55"), free=F, values=0)  # Remove A innovations
 Aϕ_EΨ  <- omxSetParameters( Aϕ_EΨ, label=c("e11","e21","e31","e22","e32","e33"), free=F, values=0) # Remove E latent growth
 Aϕ_EΨ  <- omxSetParameters( Aϕ_EΨ, label=c("ie11"), free=T, values=.1) 							# Add first E innovation
 summary( Aϕ_EΨ_fit <- mxTryHard( Aϕ_EΨ,  extraTries=15,  greenOK=FALSE, checkHess=TRUE, fit2beat=Inf, set.seed(2016)))

# Compare models
 subs <- c( AE_DCS_fit,Aϕ_EΨ_fit )
 comparisons<-mxCompare( dcs_fit, subs );comparisons