# Ordinal Data Practical for IBG Boulder Workshop
# Brad Verhulst
# Tuesday March 6th 2018

# Load the Required R Packages
#source('https://openmx.ssri.psu.edu/software/getOpenMx.R')  # Here is the link to install it if you are on a new computer
require(OpenMx)
mxOption(NULL, "Default optimizer", "NPSOL")

############################################################################################################
############################        Start of Functions      ################################################
############################################################################################################


intPlot <- function(rho = 0, Tw1Th1 = -Inf, Tw1Th2=Inf, Tw2Th1=-Inf, Tw2Th2=Inf, mini = -3, maxi = 3){	

	den<-function(T1,T2){         # Function to calcuate the bivariate normal distribution 
		mu1<- 0                   # set the expected value of x1
		mu2<- 0                   # set the expected value of x2
		s11<- 1                   # set the variance of x1
		s22<- 1                   # set the variance of x2
		term1 <- 1/(2*pi*sqrt(s11*s22*(1-rho^2)))
		term2 <- -1/(2*(1-rho^2))
		term3 <- (T1-mu1)^2/s11
		term4 <- (T2-mu2)^2/s22
		term5 <- 2*rho*((T1-mu1)*(T2-mu2))/(sqrt(s11)*sqrt(s22))
		term1*exp(term2*(term3+term4-term5))
	} 
	
	T1<- round(seq(mini,maxi,by =.1),2)     # generate series for values of x1
	T2<- round(seq(mini,maxi,by =.1),2)     # generate series for values of x2

	z<-outer(T1,T2,den)                     # calculate the density values
	dimnames(z) <- list(T1, T2)             # give it dim names

	z[T1<Tw1Th1,] <- 0            # remove below 
	z[,T2<Tw2Th1] <- 0            # remove below 

	z[T1>Tw1Th2,] <- 0            # remove above 
	z[,T2>Tw2Th2] <- 0            # remove above 

	# This function is the ploting function
	persp(T1, T2, z, col = "skyblue1",
	      theta=30, phi=20, r=50, d=0.1, expand=0.5, 
	      ltheta=90, lphi=180, ticktype="detailed", nticks=5, xlab = "Twin 1", ylab = "Twin 2", zlab = "Density")	
}


CellProp <- function(thr,r, N){
prop <-	matrix(	omxAllInt(
					matrix(c(1,r,r,1),2,2), 
					matrix(0,1,2), 
					t(matrix(c(rep(-Inf,2),thr,thr,rep(Inf,2)),2,3))), 2,2)					
round(N* prop)				
}

############################################################################################################
############################         End of Functions       ################################################
############################################################################################################


                                    ################
                                    #### PART 1 ####
                                    ################

# In the first part of the practical we will look at the area under the univariate normal distribution.

# The qnorm() function provides the z statistic for a specified probability

qnorm(.05)

qnorm(.975)


                                    ################
                                  ####  Questions  ####
                                    ################


# What is the z statistic for a probability of .5?

# What is the z statistic for a probability of .99?

# What is the z statistic for a probability of .90?


                                    ################
                                    #### PART 2 ####
                                    ################

# In the second part of the practical we will used the first function to look at the volume 
# under the bivariate normal distribution with various combinations of thresholds and correlations


# All of the arguments to the function have defaults, so the function will produce a graph if you just run the line below
intPlot()


# Looking at various combinations of affected or unaffected
intPlot(rho = 0, Tw1Th1 = -5, Tw1Th2 = 0, Tw2Th1 = -5, Tw2Th2 = 0) # Concordant unaffected
intPlot(rho = 0, Tw1Th1 = -5, Tw1Th2 = 0, Tw2Th1 = 0, Tw2Th2 = 5)  # Discordant: Tw 1 unaffected | Tw 2 affected
intPlot(rho = 0, Tw1Th1 = 0, Tw1Th2 = 5, Tw2Th1 = -5, Tw2Th2 = 0)  # Discordant: Tw 1 unaffected | Tw 2 affected
intPlot(rho = 0, Tw1Th1 = 0, Tw1Th2 = 5, Tw2Th1 = 0, Tw2Th2 = 5)   # Concordant affected


                                    ################
                                  ####  Questions  ####
                                    ################

# What would the graph look like if there was a .5 correlation between twins?

# What would the graph look like if there was a .5 correlation between twins for concordant unaffected twin pairs?

# What would the graph look like if there was a .5 correlation between twins for concordant affected twin pairs?

# What would the graph look like if there was a .5 correlation between twins for discordant twin pairs? (Hint this is two graphs)


                                    ################
                                    #### PART 3 ####
                                    ################

# Okay now we want to know how many people we would expect to have in each cell 

CellProp(thr = 0, r = 0, N = 1000)


                                    ################
                                  ####  Questions  ####
                                    ################


# Question 6: How many a) Concordant Affected, b) Discordant, c) Concordant Unaffected twin pairs would there be if the correlation was .6?

# Question 7: How many a) Concordant Affected, b) Discordant, c) Concordant Unaffected twin pairs would there be if the correlation was .3?


                                    ################
                                    #### PART 4 ####
                                    ################

# Okay now let's put this into a twin model

# First, let's assume that we want to have a model where:
Va <- .33
Vc <- .33
Ve <- .34

# What would we expect the correlations for MZ and DZ twins to be?

rMZ <- Va + Vc
rDZ <- 1/2*Va + Vc

# Second let's assume that 25% of the population has the trait (and 75% does not) (This is in the depression anxiety range).
# What would the thresholds be?

qnorm()
 
# Third let's assume that we will have 500 MZ twins and 500 DZ twins.

# How many a) Concordant Affected, b) Discordant, c) Concordant Unaffected MZ twin pairs would we expect?

CellProp(thr = qnorm(1- .25), r = rMZ, N = 500)

nmzConUnaff <- 322
nmzDiscord  <- 53*2
nmzConAff   <- 72

# How many a) Concordant Affected, b) Discordant, c) Concordant Unaffected DZ twin pairs would we expect?
CellProp(thr = qnorm(1- .25), r = rDZ, N = 500)

ndzConUnaff <- 310
ndzDiscord  <- 65*2
ndzConAff   <- 60


# Ok, let's put this into two data frames

mzData <- as.data.frame(rbind(
                matrix(c(0,0), nmzConUnaff, 2, byrow = T),
                matrix(c(1,0), nmzDiscord/2, 2, byrow = T),
                matrix(c(0,1), nmzDiscord/2, 2, byrow = T),
                matrix(c(1,1), nmzConAff, 2, byrow = T)   ))

dzData <- as.data.frame(rbind(
                matrix(c(0,0), ndzConUnaff, 2, byrow = T),
                matrix(c(1,0), ndzDiscord/2, 2, byrow = T),
                matrix(c(0,1), ndzDiscord/2, 2, byrow = T),
                matrix(c(1,1), ndzConAff, 2, byrow = T)   ))

colnames(mzData)  <- colnames(dzData) <- c("obs_T1", "obs_T2")

table(mzData)
table(dzData)


cor(mzData)   # Do these correlations look correct?
rMZ

cor(dzData)   
rDZ

# Tell openmx that we are working with ordinal data
mzData <- mxFactor(mzData, c(0:1), labels=c(1,2))
dzData <- mxFactor(dzData, c(0:1), labels=c(1,2))

# Put the data into an mxData object
MZdata  <-  mxData(mzData, type = "raw")
DZdata  <-  mxData(dzData, type = "raw")

# Now let's put this into a twin model and see what comes out


nv <- 1                                                           # Specify the number of variables per twin
ntv <- nv*2                                                       # Specify the total number of variables
selVars <- colnames(mzData)                                       # Specify the variable names

A <-  mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=c(.1), label=c("A11"), name="A" )      # The additive genetic variance component 
C <-  mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=c(.1), label=c("C11"), name="C" )      # The common environmental variance component 
E <-  mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=c(.8), label=c("E11"), name="E" )      # The unique environmental variance component

expMean    <-  mxMatrix( type="Full", nrow=1, ncol=ntv, free=F, values= 0, label=c("mean"), name="expMean" )    # The expected means
Thr        <- mxMatrix("Full", 1, 2, free = T, values = 0, labels = "thr", name = "Thr")                        # The expected thresholds

expCovMZ <- mxAlgebra( expression= rbind  ( cbind(A+C+E , A+C), cbind(A+C   , A+C+E)), name="expCovMZ" )                     # The MZ expected covariance matrix
expCovDZ <- mxAlgebra( expression= rbind  ( cbind(A+C+E     , 0.5%x%A+C), cbind(0.5%x%A+C , A+C+E)),  name="expCovDZ" )      # The DZ expected covariance matrix

# Set up the constraint for the variance of the latent liability
vars <- mxAlgebra( A+C+E, name="vars" )                        # calculate the phenotypic variance
con  <- mxConstraint(vars[1,1] ==1, name = "con")              # fix the phenotypic variance to 1


obs <-  list(A,C,E,expMean, Thr, expCovMZ,expCovDZ, vars, con)                                                    # Put all of the objects into a single place
fun <- mxFitFunctionML()                                                                                          # Specify the fit function
mzExp <- mxExpectationNormal(covariance="expCovMZ", means="expMean", thresholds = "Thr", dimnames=selVars )       # Specify the MZ expectation 
dzExp <- mxExpectationNormal(covariance="expCovDZ", means="expMean", thresholds = "Thr", dimnames=selVars )       # Specify the DZ expectation 

MZ <- mxModel("MZ", obs, MZdata, fun, mzExp)                # The MZ model
DZ <- mxModel("DZ", obs, DZdata, fun, dzExp)                # The DZ model

aceFun <- mxFitFunctionMultigroup(c("MZ","DZ"))             # The multiple group fit function
ace <- mxModel("ACE", MZ, DZ, fun, aceFun)                  # The ACE model

aceFit <- mxRun(ace)                                        # Run the model
summary(aceFit)                                             # Look at the summary


# Remember these were the parameters that we initially used
#qnorm(1- .25)

# Va <- .33
# Vc <- .33
# Ve <- .34