#setwd("")

require(OpenMx)
require(MASS)
mxOption(NULL, 'Default optimizer', 'NPSOL')
source("powerFun.R")


# 4 Steps for Calculating Power in Twin Studies (a priori)

## 1) Simulate data for the MZ and DZ twins. 
## The first step is to simulate twin data that corresponds with your expectations about the results. These expectations 
## should be based, as far possible, on the literature. Of course, if you are doing something that has never been done 
## before, these expectations can be less clear. It is also important to consider the ratio of MZ to DZ twins, as the 
## power to detect significant genetic or environmental variance is influenced by this ratio. Specifically, you will 
## need to provide the proportions of variance for the standardized A, C and E variance components. You will not have to 
## simulate the data yourself (the functions will automatically take care of this step), but you will have to provide 
## the values for the ACE parameters.
## 
## 2) Fit the model and relevant submodels. 
## The second step is simply to fit the model to the simulated data. Again, the function will do this automatically and 
## return a set of fitted parameters. It is important to make sure that the fitted parameters correspond to the values 
## that you specified in the previous stage. In some cases the simulated values may not correspond with the fitted 
## parameters. In these cases, a warning message will appear. If this happens, it is possible to increase the sample 
## size (keeping the ratio of the MZ to DZ twins constant). While fitting the full ACE model, it is trivial to also 
## fit a reduced model where the parameter of interest is fixed to zero. Fitting both the full and the reduced model 
## provides the difference between the -2 LogLikelihoods and the and obtain the χ2 value for the models, which is 
## the primary purpose of this step.
## 
## 
## 3) Obtain the weighted ncp (Wncp) 
## Third, to obtain the Wncp, we divide the χ2 value by the total sample size (NMZ + NDZ). By dividing the χ2 value 
## by the sample size, we are calculating the average contribution of each family to χ2 value. Therefore, this value 
## is dubbed the weighted ncp (Wncp).
## 
## 
## 4) Calculate power. 
## Power is calculated using the Wncp value obtained in the previous step. The essential component to discussing power 
## from the perspective of the ncp is that the ncp increases linearly with sample size. For example, if 
## the value of the χ2 value for the LRT between an ACE model and an AE model is 10 with 500 MZ and 500 DZ twin 
## pairs (N = 1000), each family will contribute 10/1000 = .01 to the χ2 value, on average. It is then possible to 
## extrapolate that with 2000 families, the χ2 value would be 20, and with 500 families the χ2 value would be 5. 
## Therefore, to apply this to an arbitrary set of sample sizes, we can multiply the Wncp obtained in the previous 
## step by a vector of sample sizes to obtain a vector of ncp's. This vector of ncp's is then used to calculate the 
## power for each sample size. 
## 
## Specifically, power is computed using the following code in R:
## 1- pchisq(qchisq(1- pcrit, 1), 1, ncp)


powerTest <- acePow(add = .33, com = .33, Nmz = 1000, Ndz = 1000)
powerTest


#  Major Question: What is the power to detect Va and Vc in the classical twin design?
#  Minor Question: What factors affect this power?

# What is the Power of A the Univariate Model as C increases?
# Note: the following 3 lines of code are calculating the power for a specific set of variables

modA1 <- acePow(add = .33, com = .1, Nmz = 1000, Ndz = 1000)
modA2 <- acePow(add = .33, com = .3, Nmz = 1000, Ndz = 1000)
modA3 <- acePow(add = .33, com = .5, Nmz = 1000, Ndz = 1000)

# Note: The following lines of code are used to make plots. The plot used below are writen as helper 
#       functions and can be edited (in R) using standard plotting commands. These will be generally
#       repeated for the other graphs in this tutorial.

#par(mfrow = c(1,2), mar = c(4, 5, 2, 1) + 0.1)
powerPlot(maxN = 1500, Wncp = c(modA1$WncpA, modA2$WncpA, modA3$WncpA))
legText <- c("A = .33 & C = .1", "A = .33 & C = .3", "A = .33 & C = .5")
legend("right", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)
mtext("Additive Genetic Variance", cex = 1.5)

# What is the Power of C the Univariate Model as A increases?
modC1 <- acePow(add = .1, com = .33, Nmz = 1000, Ndz = 1000)
modC2 <- acePow(add = .3, com = .33, Nmz = 1000, Ndz = 1000)
modC3 <- acePow(add = .5, com = .33, Nmz = 1000, Ndz = 1000)

#par( mar = c(4, 5, 2, 1) + 0.1)
powerPlot(maxN = 1500, Wncp = c(modC1$WncpC, modC2$WncpC, modC3$WncpC))
legText <- c("A = .1 & C = .33", "A = .3 & C = .33", "A = .5 & C = .33")
legend("right", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)
mtext("Common Environmental Variance", cex = 1.5)


#  Major Question: How does the proportion of MZ to DZ twins affect the power to detect Va and Vc?

# What is the Power of A and C in the Univariate Model if the MZ and DZ groups are of different sizes?
modN1 <- acePow(add = .33, com = .33, Nmz = 5000, Ndz = 1000)
modN2 <- acePow(add = .33, com = .33, Nmz = 3000, Ndz = 3000)
modN3 <- acePow(add = .33, com = .33, Nmz = 1000, Ndz = 5000)

#par(mfrow = c(1,2),  mar = c(4, 5, 2, 1) + 0.1)
powerPlot(maxN = 1500, Wncp = c(modN1$WncpA, modN2$WncpA, modN3$WncpA))
legText <- c("MZ:DZ = 5:1", "MZ:DZ = 1:1", "MZ:DZ = 1:5")
legend("right", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)
mtext("Additive Genetic Variance", cex = 1.5)

powerPlot(maxN = 1500, Wncp = c(modN1$WncpC, modN2$WncpC, modN3$WncpC))
mtext("Common Environmental Variance", cex = 1.5)
legend("bottomright", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)


#  Major Question: How does the prevalance of a binary trait affect power?

# What is the Power of A the Univariate Model for Binomial Data (relative to a continuous variable)
con  <- acePow( add  = .33, com  = .33, Nmz  = 1000, Ndz  = 1000)
bin5 <- acePowOrd( add  = .33, com  = .33,  percent = c(.5, .5), Nmz  = 1000, Ndz  = 1000)
bin4 <- acePowOrd( add  = .33, com  = .33,  percent = c(.4, .6), Nmz  = 1000, Ndz  = 1000)
bin3 <- acePowOrd( add  = .33, com  = .33,  percent = c(.3, .7), Nmz  = 1000, Ndz  = 1000)
bin2 <- acePowOrd( add  = .33, com  = .33,  percent = c(.2, .8), Nmz  = 1000, Ndz  = 1000)
bin1 <- acePowOrd( add  = .33, com  = .33,  percent = c(.9, .1), Nmz  = 1000, Ndz  = 1000)
bin05 <- acePowOrd( add  = .33, com  = .33,  percent = c(.05, .95), Nmz  = 1000, Ndz  = 1000)
 
powerPlot(maxN = 5000, Wncp = c(con$WncpA, bin5$WncpA, bin4$WncpA, bin3$WncpA, bin2$WncpA, bin1$WncpA, bin05$WncpA), pcrit = .05)
legText <- c("Continuous", "Binary (Prevalence = .5)","Binary (Prevalence = .4)","Binary (Prevalence = .3)",
             "Binary (Prevalence = .2)", "Binary (Prevalence = .1)", "Binary (Prevalence = .05)")
legend("bottomright", legend = legText, col = 1:7, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)
 
 
# Major Question: What is the Power to detect correlations bewteen the variance components? 
 
# What is the Power to detect Rg in the Bivariate Model?
rg1 <- bivPow(A1 = .3, A2 = .3, rg = .10, C1 = .33, C2 = .33, rc = .1, Nmz = 1000, Ndz = 1000)
rg2 <- bivPow(A1 = .3, A2 = .3, rg = .30, C1 = .33, C2 = .33, rc = .3, Nmz = 1000, Ndz = 1000)
rg3 <- bivPow(A1 = .3, A2 = .3, rg = .50, C1 = .33, C2 = .33, rc = .5, Nmz = 1000, Ndz = 1000)

rg4 <- bivPow(A1 = .4, A2 = .4, rg = .10, C1 = .33, C2 = .33, rc = .1, Nmz = 1000, Ndz = 1000)
rg5 <- bivPow(A1 = .4, A2 = .4, rg = .30, C1 = .33, C2 = .33, rc = .3, Nmz = 1000, Ndz = 1000)
rg6 <- bivPow(A1 = .4, A2 = .4, rg = .50, C1 = .33, C2 = .33, rc = .5, Nmz = 1000, Ndz = 1000)

rg7 <- bivPow(A1 = .5, A2 = .5, rg = .10, C1 = .33, C2 = .33, rc = .1, Nmz = 1000, Ndz = 1000)
rg8 <- bivPow(A1 = .5, A2 = .5, rg = .30, C1 = .33, C2 = .33, rc = .3, Nmz = 1000, Ndz = 1000)
rg9 <- bivPow(A1 = .5, A2 = .5, rg = .50, C1 = .33, C2 = .33, rc = .5, Nmz = 1000, Ndz = 1000)

#par(mfrow = c(3,2),  mar = c(4, 5, 2, 1) + 0.1)
#powerPlot(maxN = 750, Wncp = c(rg1$WncpA1, rg2$WncpA1, rg3$WncpA1))
#legText <- c("a = .3 & rg = .1", "a = .3 & rg = .3", "a = .3 & rg = .5")
#legend("right", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)
#mtext("Additive Genetic Variance", cex = 1)
legText <- c( "rg = .1", "rg = .3", "rg = .5")

powerPlot(maxN = 2000, Wncp = c(rg1$WncpRg, rg2$WncpRg, rg3$WncpRg))
mtext("Genetic Correlation (A=.3, C=.33)", cex = 1)
legend("topleft", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)

#powerPlot(maxN = 750, Wncp = c(rg4$WncpA1, rg5$WncpA1, rg6$WncpA1))
#legText <- c("a = .4 & rg = .1", "a = .4 & rg = .3", "a = .4 & rg = .5")
#legend("right", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)
#mtext("Additive Genetic Variance", cex = 1)

powerPlot(maxN = 2000, Wncp = c(rg4$WncpRg, rg5$WncpRg, rg6$WncpRg))
mtext("Genetic Correlation (A=.4, C=.33)", cex = 1)
legend("right", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)

#powerPlot(maxN = 750, Wncp = c(rg7$WncpA1, rg8$WncpA1, rg9$WncpA1))
#legend("right", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)
#mtext("Additive Genetic Variance", cex = 1)

powerPlot(maxN = 2000, Wncp = c(rg7$WncpRg, rg8$WncpRg, rg9$WncpRg))
mtext("Genetic Correlation (A=.5, C=.33)", cex = 1)
legend("bottomright", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)


# What is the Power to detect Qualitative (different genes) and Quantitative (same genes but to different extents) Sex Limitation?

# Qualitative Sex Limitation
qual9 <- sexLimPower(am = .5, cm = .1, af = .5, cf = .1, rg = .9  , Nmzm = 5000, Nmzf = 5000, Ndzm = 5000, Ndzf = 5000, Ndzo = 10000)
qual7 <- sexLimPower(am = .5, cm = .1, af = .5, cf = .1, rg = .7  , Nmzm = 5000, Nmzf = 5000, Ndzm = 5000, Ndzf = 5000, Ndzo = 10000)
qual5 <- sexLimPower(am = .5, cm = .1, af = .5, cf = .1, rg = .5  , Nmzm = 5000, Nmzf = 5000, Ndzm = 5000, Ndzf = 5000, Ndzo = 10000)
qual3 <- sexLimPower(am = .5, cm = .1, af = .5, cf = .1, rg = .3  , Nmzm = 5000, Nmzf = 5000, Ndzm = 5000, Ndzf = 5000, Ndzo = 10000)
qual1 <- sexLimPower(am = .5, cm = .1, af = .5, cf = .1, rg = .1  , Nmzm = 5000, Nmzf = 5000, Ndzm = 5000, Ndzf = 5000, Ndzo = 10000)

# Quantitative Sex Limitation
quant1 <- sexLimPower(am = .2, cm = .2, af = .5, cf = .2, rg = 1, Nmzm = 5000, Nmzf = 5000, Ndzm = 5000, Ndzf = 5000, Ndzo = 5000)
quant2 <- sexLimPower(am = .3, cm = .2, af = .5, cf = .2, rg = 1, Nmzm = 5000, Nmzf = 5000, Ndzm = 5000, Ndzf = 5000, Ndzo = 5000)
quant3 <- sexLimPower(am = .4, cm = .2, af = .5, cf = .2, rg = 1, Nmzm = 5000, Nmzf = 5000, Ndzm = 5000, Ndzf = 5000, Ndzo = 5000)


#par(mfrow = c(1,2),  mar = c(4, 5, 2, 1) + 0.1)

powerPlot(maxN = 15000, Wncp = c(qual9$WncpRg, qual7$WncpRg, qual5$WncpRg, qual3$WncpRg, qual1$WncpRg))
legText <- c("rg = .9", "rg = .7", "rg = .5", "rg = .3", "rg = .1")
legend("bottomright", legend = legText, col = 1:5, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)
mtext("Qualitative Sex Limitation", cex = 1)


powerPlot(maxN = 5000, Wncp = c(quant1$WncpEqualA, quant2$WncpEqualA, quant3$WncpEqualA))
legText <- c("Am = .2 & Af = .5", "Am = .3 & Af = .5", "Am = .4 & Af = .5")
legend("bottomright", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)
mtext("Quantitative Sex Limitation", cex = 1)


# What is the Power to detect Additive Genetic Variance in a Common Pathway Twin Model?

# Simulate data for the Common Pathway Model 
ComPathDat <- comPathSim(aL=.2, cL=.2, eL=.6, load = c(.9,.8,.7,.6,.5), 
                         specA = c(.2, .2, .2, .2,.2), specC = c(.2, .2, .2, .2,.2), specE = c(.2, .2, .2, .2,.2), 
						 Nmz = 2000, Ndz = 2000)

# Put the data into MZ and DZ objects
mzData <- ComPathDat$mzData	
dzData <- ComPathDat$dzData	
	
# Set up objects for the analysis
selVars <- colnames(mzData)            
nf  <- 1       # number of factors
ntv <-  dim(mzData)[2]
nv <- ntv/2
nl        <- 1
vars <- paste("V", 1:nv, sep = "")

# Create Mean Matrices
meanG     <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=0, labels=paste("mean",vars,sep="_"), name="meanG" )

# Matrices ac, cc, and ec to store a, c, and e path coefficients for latent phenotype(s)
pathAl    <- mxMatrix( type="Lower", nrow=nl, ncol=nl, free=TRUE, values=.6, labels="aPath", lbound=.00001, name="al" )
pathCl    <- mxMatrix( type="Lower", nrow=nl, ncol=nl, free=TRUE, values=.6, labels="cPath", lbound=.00001, name="cl" )
pathEl    <- mxMatrix( type="Lower", nrow=nl, ncol=nl, free=TRUE, values=.6, labels="ePath", lbound=.00001, name="el" )

# Matrix and Algebra for constraint on variance of latent phenotype
covarLP   <- mxAlgebra( expression=al %*% t(al) + cl %*% t(cl) + el %*% t(el), name="CovarLP" )
varLP     <- mxAlgebra( expression= diag2vec(CovarLP), name="VarLP" )
unit      <- mxMatrix( type="Unit", nrow=nl, ncol=1, name="Unit")
varLP1    <- mxConstraint( expression=VarLP == Unit, name="varLP1")

# Matrix f for factor loadings on latent phenotype
pathFl    <- mxMatrix( type="Full", nrow=nv, ncol=nl, free=TRUE, values=.2, labels=paste("load",1:nv,sep = ""), name="fl" )

# Matrices as, cs, and es to store a, c, and e path coefficients for specific factors
pathAs    <- mxMatrix( type="Diag", nrow=nv, ncol=nv, free=TRUE, values=.4, labels=paste("as",1:nv, sep = ""), lbound=.00001, name="as" )
pathCs    <- mxMatrix( type="Diag", nrow=nv, ncol=nv, free=TRUE, values=.4, labels=paste("cs",1:nv, sep = ""), lbound=.00001, name="cs" )
pathEs    <- mxMatrix( type="Diag", nrow=nv, ncol=nv, free=TRUE, values=.5, labels=paste("es",1:nv, sep = ""), lbound=.00001, name="es" )

# Matrices A, C, and E compute variance components
covA      <- mxAlgebra( expression=fl %&% (al %*% t(al)) + as %*% t(as), name="A" )
covC      <- mxAlgebra( expression=fl %&% (cl %*% t(cl)) + cs %*% t(cs), name="C" )
covE      <- mxAlgebra( expression=fl %&% (el %*% t(el)) + es %*% t(es), name="E" )

# Create Algebra for expected Variance/Covariance Matrices in MZ & DZ twins
covP      <- mxAlgebra( expression= A+C+E, name="V" )
covMZ     <- mxAlgebra( expression= A+C, name="cMZ" )
covDZ     <- mxAlgebra( expression= 0.5%x%A+ C, name="cDZ" )
expCovMZ  <- mxAlgebra( expression= rbind( cbind(V, cMZ), cbind(t(cMZ), V)), name="expCovMZ" )
expCovDZ  <- mxAlgebra( expression= rbind( cbind(V, cDZ), cbind(t(cDZ), V)), name="expCovDZ" )

# Create Data Objects for Multiple Groups
dataMZ    <- mxData( observed=mzData, type="raw" )
dataDZ    <- mxData( observed=dzData, type="raw" )

# Create Expectation Objects for Multiple Groups
expMZ     <- mxExpectationNormal( covariance="expCovMZ", means="meanG", dimnames=selVars )
expDZ     <- mxExpectationNormal( covariance="expCovDZ", means="meanG", dimnames=selVars )
funML     <- mxFitFunctionML()

# Create Model Objects for Multiple Groups
pars      <- list(meanG, pathAl, pathCl, pathEl, covarLP, varLP, unit, pathFl, pathAs, pathCs, pathEs, covA, covC, covE, covP)
modelMZ   <- mxModel( name="MZ", pars, covMZ, expCovMZ, dataMZ, expMZ, funML )
modelDZ   <- mxModel( name="DZ", pars, covDZ, expCovDZ, dataDZ, expDZ, funML )
multi     <- mxFitFunctionMultigroup( c("MZ","DZ") )

# Build & Run the Full Model 
modelCP   <- mxModel( "ComPath", pars, varLP1, modelMZ, modelDZ, multi )
fullCP     <- mxRun(modelCP)
summary(fullCP)

# Build & Run the Reduced Model 
redCP <- omxSetParameters(fullCP, labels = "aPath", value = 0, free = FALSE, name = "reduced")
redCP     <- mxRun(redCP)
summary(redCP)

# Calculate the Weighted NCP
neg2LL <- redCP$output$fit - fullCP$output$fit
totN   <- dim(mzData)[1]+dim(dzData)[1]
Wncp <- neg2LL / totN
Wncp

powerPlot(maxN = 2000, Wncp = c( 0.003413346, 0.00928087, 0.03469791)) 
legText <- c("A = .2, C = .2, & E = .6", "A = .3, C = .2, & E = .5", "A = .4, C = .2, & E = .4")
legend("bottomright", legend = legText, col = 1:3, ncol = 1, cex = 1, lwd = 3, box.lwd = 0)
mtext("Power to detect Additive Genetic \n Variance in a Latent Factor", cex = 1.5, line = 1)