# -----------------------------------------------------------------------
# Program: UnivariateTwinAnalysis_MatrixRawCon.R  
#  Author: Hermine Maes & Matt Keller
#    Date: March 3, 2010 
#
# Univariate Twin Analysis model to estimate causes of variation (ADCE)
# for continuous data
# Matrix style model input - Raw data input
#
# Revision History
#   Matt Keller -- March 3, 2010, added D to an ACE script to demonstrate
#   identification issues
#   Matt Keller - March 8, 2012, minor updates to reflect changes in new
#   OpenMx version.
# -----------------------------------------------------------------------


# -----------------------------------------------------------------------
# START PRACTICAL 1 (you don't need to change anything for this practical)
# -----------------------------------------------------------------------


# Starting things up
# -----------------------------------------------------------------------
# Where is the working directory? 
getwd()  #If it's not where you want it to be, change it (Under File)

# Get the two packages we'll use
require(OpenMx)

# Get some additional functions written by Hermine & Tim
# **NOTE**: you can read in an R source file from the web! Cool!
source("http://www.vipbg.vcu.edu/~vipbg/Tc24/GenEpiHelperFunctions.R")


#Get our dataset from the web.
#This is a cool R trick: your file might exist on your local machine
#OR it might exist on the web!
con <- "http://www.matthewckeller.com/public/ntf.data"
ntf.data <- read.table(con,header=TRUE)
# -----------------------------------------------------------------------


# Prepare Data
# -----------------------------------------------------------------------
head(ntf.data)
str(ntf.data)

#get summary of each column in ntf.data
summary(ntf.data)  #YIKES - look at those big -999's in there - those are NAs

#change -999 to NA
ntf.data[ntf.data==-999] <- NA

#make sure changing the NA's worked:
summary(ntf.data)

#define variables
nv <- 1       #number of variables
ntv <- nv*2
mzData <- ntf.data[ntf.data$twin.type=='MZ',3:4]
dzData <- ntf.data[ntf.data$twin.type=='DZ',3:4]
dim(mzData)
dim(dzData)
# -----------------------------------------------------------------------


# Print Descriptive Statistics
# -----------------------------------------------------------------------
cov(mzData,use="complete")  #MZ Var/Covar matrix
cov(dzData,use="complete")  #DZ Var/Covar matrix

#create object of MZ vs DZ cov's
(cov.MZ <- cov(mzData,use="complete") [1,2])
(cov.DZ <- cov(dzData,use="complete") [1,2])
# -----------------------------------------------------------------------


# Get ****RANDOM**** start values
# -----------------------------------------------------------------------
(a.start <- runif(1,-1,1))
(d.start <- runif(1,-1,1))
(c.start <- runif(1,-1,1))
#Look at your own personal start values
#Ask your neighbor what they got!
# -----------------------------------------------------------------------


# Fit ADCE Model with RawData and Matrices Input
# -----------------------------------------------------------------------
univADCEModel <- mxModel("univADCE",
    mxModel("ADCE",
    # Matrices a, c, and e to store a, c, and e path coefficients
    # ***NOTE*** IF a, d, and c are all free=TRUE, your model will not be
    # identified! We WANT our model to not be identified here
        mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE,
                 values=a.start, label="a11", name="a" ), 
        mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE,
                 values=d.start, label="d11", name="d" ), 
        mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE,
                 values=c.start, label="c11", name="c" ),
        mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE,
                 values=0, label="e11", name="e" ),
    # Matrices A, C, and E compute variance components
        mxAlgebra( expression=a %*% t(a), name="A" ),
        mxAlgebra( expression=d %*% t(d), name="D" ), 
        mxAlgebra( expression=c %*% t(c), name="C" ),
        mxAlgebra( expression=e %*% t(e), name="E" ),
    # Algebra to compute ***total variance***
        mxAlgebra( expression=A+D+C+E, name="TotVar" ),
    # Matrix & Algebra for expected ***means*** vector
        mxMatrix( type="Full", nrow=1, ncol=nv, free=TRUE,
                 values= 20, label="mean", name="Mean" ),
        mxAlgebra( expression= cbind(Mean,Mean), name="expMean"),
    # Algebra for expected variance/covariance matrix in ***MZ***
        mxAlgebra( expression= rbind(
            cbind(A+D+C+E , A+D+C),
            cbind(A+D+C   , A+D+C+E)),name="expCovMZ" ),
    # Algebra for expected variance/covariance matrix in ***DZ***
        mxAlgebra( expression= rbind(
            cbind(A+D+C+E     , 0.5%x%A+0.25%x%D+C),
            cbind(0.5%x%A+0.25%x%D+C , A+D+C+E)),  name="expCovDZ" ) 
    ),
    mxModel("MZ",
        mxData( observed=mzData, type="raw" ),
        mxFIMLObjective( covariance="ADCE.expCovMZ",
                        means="ADCE.expMean", dimnames=c('tw1','tw2')  )
    ),
    mxModel("DZ", 
        mxData( observed=dzData, type="raw" ),
        mxFIMLObjective( covariance="ADCE.expCovDZ",
                        means="ADCE.expMean", dimnames=c('tw1','tw2') ) 
    ),
    mxAlgebra( expression=MZ.objective + DZ.objective, name="Neg2sumll" ),
    mxAlgebraObjective("Neg2sumll")
)

univADCEFit <- mxRun(univADCEModel)
summary(univADCEFit)

#Estimate of VA - ask your neighbors what they got!
univADCEFit@submodels$ADCE@matrices$a@values^2

#Estimate of VD - ask your neighbors what they got!
univADCEFit@submodels$ADCE@matrices$d@values^2

#Estimate of VC - ask your neighbors what they got!
univADCEFit@submodels$ADCE@matrices$c@values^2

# -----------------------------------------------------------------------


# -----------------------------------------------------------------------
# END PRACTICAL 1
# -----------------------------------------------------------------------


# -----------------------------------------------------------------------
# START PRACTICAL 2  - YOU need to change the ???'s to make this script work
# -----------------------------------------------------------------------


# Fit ***ADE*** model by dropping C
# -----------------------------------------------------------------------
univADEModel <- mxModel(univADCEFit, name="univADE",
    mxModel(univADCEFit$ADCE,
        mxMatrix( type="Lower", nrow=1, ncol=1, free=???, values=???,
                 label="c11", name="c" ) # drop c at 0
    )
)

univADEFit <- mxRun(univADEModel)
summary(univADEFit)  #What is your -2LL

#Estimate of VA - ask your neighbors what they got!
univADEFit@submodels$ADCE@matrices$a@values^2

#Estimate of VD - ask your neighbors what they got!
univADEFit@submodels$ADCE@matrices$d@values^2

#Estimate of VC - ask your neighbors what they got!
univADEFit@submodels$ADCE@matrices$c@values^2
# -----------------------------------------------------------------------


# Fit ***ACE*** model
# ***NOTE: we normally wouldn't run an ACE model bc CovMz > 2CovDz
# Can anyone notice a difference in the -2LL?
# -----------------------------------------------------------------------
univACEModel <- mxModel(univADCEFit, name="univACE",
    mxModel(univADCEFit$ADCE,
        mxMatrix( type="Lower", nrow=1, ncol=1, free=???, values=???,
                 label="???", name="???" ) # drop d at 0
    )
)

univACEFit <- mxRun(univACEModel)
summary(univACEFit)  #What is your -2LL?


#Estimate of VA - ask your neighbors what they got!
univACEFit@submodels$ADCE@matrices$a@values^2

#Estimate of VD - ask your neighbors what they got!
univACEFit@submodels$ADCE@matrices$d@values^2

#Estimate of VC - ask your neighbors what they got!
univACEFit@submodels$ADCE@matrices$c@values^2
# -----------------------------------------------------------------------


# Fit ***DCE*** model
# ***NOTE: We are NOT recommeding this model. A phenotype with only D and
# no A is VERY unlikely (for reasons Dr. Eaves discussed).
# Rather, the point is simply to but show that it is mathematically possible 
# in order to demonstrate the concept of parameter indeterminacy
# -----------------------------------------------------------------------
univDCEModel <- ???

univDCEFit <- mxRun(univDCEModel)
summary(univDCEFit) #What is your -2LL

#Estimate of VA - ask your neighbors what they got!
univDCEFit@submodels$ADCE@matrices$a@values^2

#Estimate of VD - ask your neighbors what they got!
univDCEFit@submodels$ADCE@matrices$d@values^2

#Estimate of VC - ask your neighbors what they got!
univDCEFit@submodels$ADCE@matrices$c@values^2
# -----------------------------------------------------------------------


#compare the fits of the four models run
tableFitStatistics(univADCEFit,list(univADEFit,univACEFit,univDCEFit))

# -----------------------------------------------------------------------
# END PRACTICAL 2
# -----------------------------------------------------------------------


# -----------------------------------------------------------------------
# START PRACTICAL 3 - YOU need to change the ???'s to make this script work
# -----------------------------------------------------------------------

# What would have happened if, in the real world, C = .05, .10, or .15 rather
# than being its assumed value of 0??


# Fit ADE model where C is assumed to be ***.05***
# NOTE: YOU need to figure out what to change!
# HINT: the path is the sqrt(x) of the variance. So if you want to simulate
# variance of .05, the path should be sqrt(.05).
# -----------------------------------------------------------------------
univADEModel.05 <- mxModel(univADCEFit, name="univADE",
    mxModel(univADCEFit$ADCE,
        mxMatrix( type="Lower", nrow=1, ncol=1, free=FALSE, values=sqrt(???),
                 label="c11", name="c" ) # drop c^2 at .05
    )
)

univADEFit.05 <- mxRun(univADEModel.05)
summary(univADEFit.05)


#Estimate of VA - ask your neighbors what they got!
univADEFit.05@submodels$ADCE@matrices$a@values^2

#Estimate of VD - ask your neighbors what they got!
univADEFit.05@submodels$ADCE@matrices$d@values^2

#Estimate of VC - ask your neighbors what they got!
univADEFit.05@submodels$ADCE@matrices$c@values^2
# -----------------------------------------------------------------------


# Fit ADE model where C is assumed to be ***.10***
# NOTE: YOU need to figure out what to change!
# -----------------------------------------------------------------------
univADEModel.10 <- mxModel(univADCEFit, name="univADE",
    mxModel(univADCEFit$ADCE,
        mxMatrix( type="Lower", nrow=1, ncol=1, free=FALSE, values=???,
                 label="c11", name="c" ) # drop c^2 at .10
    )
)

univADEFit.10 <- mxRun(univADEModel.10)
summary(univADEFit.10)


#Estimate of VA - ask your neighbors what they got!
univADEFit.10@submodels$ADCE@matrices$a@values^2

#Estimate of VD - ask your neighbors what they got!
univADEFit.10@submodels$ADCE@matrices$d@values^2

#Estimate of VC - ask your neighbors what they got!
univADEFit.10@submodels$ADCE@matrices$c@values^2
# -----------------------------------------------------------------------


# Fit ADE model where C is assumed to be ***.15***
# NOTE: YOU need to figure out what to change!
# -----------------------------------------------------------------------
univADEModel.15 <- mxModel(univADCEFit, name="univADE",
    mxModel(univADCEFit$ADCE,
        mxMatrix( type="Lower", nrow=1, ncol=1, free=FALSE, values=???,
                 label="c11", name="c" ) # drop c^2 at .15
    )
)

univADEFit.15 <- mxRun(univADEModel.15)
summary(univADEFit.15)


#Estimate of VA - ask your neighbors what they got!
univADEFit.15@submodels$ADCE@matrices$a@values^2

#Estimate of VD - ask your neighbors what they got!
univADEFit.15@submodels$ADCE@matrices$d@values^2

#Estimate of VC - ask your neighbors what they got!
univADEFit.15@submodels$ADCE@matrices$c@values^2
# -----------------------------------------------------------------------


# Compare the results of ***A-hat*** across assumed values of C:
univADEFit@submodels$ADCE$a@values^2
univADEFit.05@submodels$ADCE$a@values^2  #A is what when C=.05?
univADEFit.10@submodels$ADCE$a@values^2  #A is what when C=.1?
univADEFit.15@submodels$ADCE$a@values^2  #A is what when C=.15?

# Compare the results of ***D-hat*** across assumed values of C:
univADEFit@submodels$ADCE$d@values^2
univADEFit.05@submodels$ADCE$d@values^2  #D is what when C=.05?
univADEFit.10@submodels$ADCE$d@values^2  #D is what when C=.1?
univADEFit.15@submodels$ADCE$d@values^2  #D is what when C=.15?

# Compare the -2LLs of all these models
univADEFit@algebras$'Neg2sumll'@result
univADEFit.05@algebras$'Neg2sumll'@result
univADEFit.10@algebras$'Neg2sumll'@result
univADEFit.15@algebras$'Neg2sumll'@result


# -----------------------------------------------------------------------
# END PRACTICAL 3
# -----------------------------------------------------------------------


# -----------------------------------------------------------------------
# START PRACTICAL 4 (you don't need to do anything to make this work)
# -----------------------------------------------------------------------


#Create the data
mz.ntf <- ntf.data[ntf.data$twin.type=='MZ',3:6]
dz.ntf <- ntf.data[ntf.data$twin.type=='DZ',3:6]


#Fit NTF Model with **Sibling Env** (instead of "C" - both ~ the same)
#NOTE: This is the "piecewise" way of writing MX models
#-----------------------------------------------------------------------
# Paths going to latent variables
a <- mxMatrix(type="Lower",nrow=nv,ncol=nv,free=T,values=.6,
              label="AddGenPath",name="a")
d <- mxMatrix(type="Lower",nrow=nv,ncol=nv,free=T,values=.6,
              label="DomPath",name="d")
s <- mxMatrix(type="Lower", nrow=nv, ncol=nv, free=T,  values=.4,
              label="SibPath", name="s")
e <- mxMatrix(type="Lower",nrow=nv,ncol=nv,free=T,values=.6,
              label="EnvPath",name="e")
mu <- mxMatrix(type="Lower",nrow=nv,ncol=nv,free=T,values=.0,
               label="AMCopath",name="mu")


# Matrices for latent variances
Vp1 <- mxMatrix(type="Full",nrow=nv,ncol=nv,free=T,values=1.5,
                label="VarPhen",name="Vp1")
delta1 <- mxMatrix(type="Full",nrow=nv,ncol=nv,free=T,values=.5,
                   label="CovPhenGen",name="delta1")
q1 <- mxMatrix(type="Full",nrow=nv,ncol=nv,free=T,values=1,
               label="LatentVarAddGen",name="q1")

# mxAlgebra - nonlinear constraints
Vp2 <- mxAlgebra(a %&% q1 + e %*% t(e) + d %*% t(d) + s %*% t(s),name="Vp2")
delta2 <- mxAlgebra(q1 %*% a,name="delta2")
q2 <- mxAlgebra(1 + delta1 %&% mu,name="q2")

# Equating nonlinear constraints and parameters
VpCon <- mxConstraint(Vp1==Vp2,name='VpCon')
deltaCon <- mxConstraint(delta1==delta2,name='deltaCon')            
qCon <- mxConstraint(q1==q2,name='qCon')           

# mxAlgebra - relative covariances
CvMz <- mxAlgebra(a %&% q1 + d %*% t(d) + s %*% t(s),name="CvMz")
CvDz <- mxAlgebra(a %&% (q1-.5) + .25 %x% d %*% t(d) + s %*% t(s),name="CvDz")
ParChild <- mxAlgebra(.5 %x% a %*% delta1 + .5 %x% a %*% delta1 %*% mu %*% Vp1,
                      name="ParChild")
CvSps <- mxAlgebra(Vp1 %&% mu,name="CvSps")


# Put all these pathways, variances, and constraints together
ntf.pths <- mxModel(model="NTFpaths",a,d,e,mu,s,Vp1,delta1,q1,Vp2,delta2,q2,VpCon,deltaCon,qCon,CvMz,CvDz,ParChild,CvSps)


# make the expected mean and covariance matrices for MZ data
# and pull the raw MZ data in
mzModel <- mxModel(model=ntf.pths,name = "MZNTF",
    mxMatrix(type="Full", nrow=1, ncol=4*nv, free=TRUE,  values= .05, label="mean",
             dimnames=list(NULL, colnames(mz.ntf)), name="expMeanMz"), 
    # Algebra for expected variance/covariance matrix in MZ
    mxAlgebra(rbind(
        cbind(Vp1,       CvMz,      ParChild,   ParChild),
        cbind(CvMz,      Vp1,       ParChild,   ParChild),
        cbind(ParChild,  ParChild,  Vp1,        CvSps),
        cbind(ParChild,  ParChild,  CvSps,      Vp1)),
              dimnames=list(colnames(mz.ntf),colnames(mz.ntf)),name="expCovMz"),
    mxData(observed=mz.ntf, type="raw"), 
    mxFIMLObjective(covariance="expCovMz",means="expMeanMz"))


# make the expected mean and covariance matrices for DZ data
# and pull the raw DZ data in
dzModel <- mxModel(model=ntf.pths,name = "DZNTF",
    mxMatrix(type="Full", nrow=1, ncol=4*nv, free=TRUE,  values= .05, label="mean",
             dimnames=list(NULL, colnames(dz.ntf)), name="expMeanDz"), 
    # Algebra for expected variance/covariance matrix in DZ
    mxAlgebra(rbind(
        cbind(Vp1,       CvDz,      ParChild,   ParChild),
        cbind(CvDz,     Vp1,        ParChild,   ParChild),
        cbind(ParChild, ParChild,   Vp1,        CvSps),
        cbind(ParChild, ParChild,   CvSps,      Vp1)),
              dimnames=list(colnames(dz.ntf),colnames(dz.ntf)),name="expCovDz"),
    mxData(observed=dz.ntf, type="raw"), 
    mxFIMLObjective(covariance="expCovDz",means="expMeanDz"))


#create the mxModel that defines the objective function
ASDE <- mxModel(model="ASDE", mzModel, dzModel,
    #MZNTF.objective is the automatic name for the -2LL of mzModel
    mxAlgebra(MZNTF.objective + DZNTF.objective, name="Neg2LL"),  
    mxAlgebraObjective("Neg2LL"))


#Run MX
ASDE.Fit <- mxRun(ASDE)
summary(ASDE.Fit)  


# estimated A from NTFD (true simulated value was .30)
ASDE.Fit@submodels$MZNTF$a@values^2

# estimated D from NTFD (true simulated value was .30)
ASDE.Fit@submodels$MZNTF$d@values^2

# estimated S from NTFD (true simulated value was .10)
ASDE.Fit@submodels$MZNTF$s@values^2

# estimated E from NTFD
ASDE.Fit@submodels$MZNTF$e@values^2


# -----------------------------------------------------------------------
# END PRACTICAL 4
# -----------------------------------------------------------------------